From d99e29cbea6752196411747ecdbdf865b06631a5 Mon Sep 17 00:00:00 2001 From: Maximilian Heise <7600641+maxheise@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:03:50 +0200 Subject: [PATCH] epub: free the parsed document in its owning load functions, not in set_xml_root_node backend/epub/epub-document.c parses container.xml, the OPF package document, and the per-page XHTML into the file-scope global xmldocument, and releases it with the helper xml_free_doc(), which frees the document and sets the global back to NULL. open_xml_document() assigns xmldocument = xmlParseFile(...) without freeing any previous value, and a document load opens these documents in series, so whatever a parse leaves behind has to be released before the next open_xml_document() overwrites the only pointer. xmldocument is not freed at document finalize, so a parse tree left unreleased leaks; a later open_xml_document() then overwrites the only pointer to it. The values these functions return (a content URI, the content list, a toc file name, a stylesheet href) are independent allocations: g_filename_to_uri results, g_malloc'd list nodes, and xmlGetProp copies that do not point into the document tree, so each owner can free the document before returning its result. The ownership rule this follows: the function that opens a document owns its lifetime and frees it on every exit; set_xml_root_node() only validates the root element and must not free. set_xml_root_node() previously broke that rule. On the no-root-element branch it called a bare xmlFreeDoc(xmldocument), freeing the global as a side effect of a function named for validation. That free is removed here; set_xml_root_node() now frees nothing; it only sets the global xmlroot and reports whether the root matches. Freeing inside it would not even be safe: two callers pass a non-NULL root name and ignore the return value (setup_document_index() with "ncx", epub_document_get_info() with "package"), then call xml_get_pointer_to_node(), which dereferences xmlroot->name. Freeing the tree inside the helper on a root-name mismatch would leave those callers dereferencing freed memory. Leaving the release to the owners also keeps the failure mode benign: a caller that omits a free leaks rather than uses freed memory. With the helper no longer freeing, each owner frees the document on the exits that previously leaked it. set_xml_root_node()'s callers split two ways: - get_uri_to_content() and setup_document_content_list() check the return value and bail; they now free before their bail-out returns ("container file is corrupt"; "content file is invalid"). Their other early error returns also free ("epub file is invalid or corrupt" with no rootfile node, "epub file is corrupt, no container" with no full-path attribute, "could not retrieve container file" when g_filename_to_uri fails; "epub file has no spine", "epub file has no manifest", and the errorflag return for a missing or broken manifest item). - setup_document_index() ("ncx") and epub_document_get_info() ("package") ignore the return value, but each already calls xml_free_doc() at its natural end and has no early return after opening the document, so it keeps a valid tree through its lookups and frees it at the end. They need no change. The remaining owners with an early return that bypassed their end-of-function free also free now: - get_toc_file_name(): the early return taken when the spine has no "toc" attribute (ncx == NULL). The in-code comment notes this is the EPUB3 case with no NCX, where the nav file is used instead, so it is reached for ordinary EPUB3 documents that have no NCX, not only for malformed input (ncx == NULL can also arise from malformed package data). It is called unconditionally from setup_document_index() during the load. - epub_document_get_alternate_stylesheet(): the success path, taken when a alternate stylesheet is present. The href is read with xml_get_data_from_node(..., XML_ATTRIBUTE, ...), which calls xmlGetProp() and returns a fresh copy, so the document is freed first and that copy is returned afterwards. xml_free_doc() sets xmldocument to NULL after freeing, so the added calls are fine on every path; successful return values are unchanged (the same strings and lists), and only the point at which the parsed tree is released changes. --- backend/epub/epub-document.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c index 57d626c4..c41ebd1a 100644 --- a/backend/epub/epub-document.c +++ b/backend/epub/epub-document.c @@ -487,7 +487,6 @@ set_xml_root_node(xmlChar* rootname) if (xmlroot == NULL) { - xmlFreeDoc(xmldocument); return FALSE; } @@ -896,6 +895,7 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document) EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("container file is corrupt")); + xml_free_doc(); return NULL ; } @@ -906,6 +906,7 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document) EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("epub file is invalid or corrupt")); + xml_free_doc(); return NULL ; } @@ -916,6 +917,7 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document) EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("epub file is corrupt, no container")); + xml_free_doc(); return NULL ; } @@ -958,6 +960,7 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document) EV_DOCUMENT_ERROR_INVALID, _("could not retrieve container file")); } + xml_free_doc(); return NULL ; } xml_free_doc(); @@ -1010,6 +1013,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("content file is invalid")); + xml_free_doc(); return FALSE ; } @@ -1019,6 +1023,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("epub file has no spine")); + xml_free_doc(); return FALSE ; } @@ -1028,6 +1033,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu EV_DOCUMENT_ERROR, EV_DOCUMENT_ERROR_INVALID, _("epub file has no manifest")); + xml_free_doc(); return FALSE ; } @@ -1122,6 +1128,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu } /*free any nodes that were set up and return empty*/ g_list_free_full(newlist, (GDestroyNotify)free_tree_nodes); + xml_free_doc(); return NULL; } @@ -1169,6 +1176,7 @@ get_toc_file_name(gchar *containeruri) /*In an epub3, there is sometimes no toc, and we need to then use the nav file for this.*/ if (ncx == NULL) { + xml_free_doc(); return NULL; } @@ -1556,7 +1564,9 @@ epub_document_get_alternate_stylesheet(gchar *docuri) xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"class",(xmlChar*)"night"); if (xmlretval != NULL) { - return (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href"); + gchar *stylesheet = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href"); + xml_free_doc(); + return stylesheet; } xml_free_doc(); return NULL;