From 42e3adf834db7960562303eeb099d9e5a9bdd0b6 Mon Sep 17 00:00:00 2001 From: tompng Date: Mon, 15 Jun 2026 19:49:37 +0900 Subject: [PATCH] Simplify element.attributes structure Change REXML::Attributes internal structure, Refactor ATTLIST handling in Attributes. Fix inconsistent beavior of handling ATTLIST. Stop fuzzy get_attribute/get_attribute_ns search and make it Document Object Model complient. --- lib/rexml/attribute.rb | 11 +- lib/rexml/doctype.rb | 28 ++--- lib/rexml/element.rb | 195 +++++++++++++--------------------- test/test_attributes.rb | 10 ++ test/test_attributes_mixin.rb | 6 +- test/test_contrib.rb | 4 +- test/test_core.rb | 17 ++- 7 files changed, 131 insertions(+), 140 deletions(-) diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index c56732497..b31beb747 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -180,7 +180,7 @@ def element=( element ) # # This method is usually not called directly. def remove - @element.attributes.delete self.name unless @element.nil? + @element.attributes.delete self unless @element.nil? end # Writes this attribute (EG, puts 'key="value"' to the output) @@ -205,6 +205,15 @@ def xpath def document @element&.document end + + # Returns true if this attribute is a namespace declaration, false otherwise. + # "foo" => false + # "xmlns" => true + # "xmlns:foo" => true + # "foo:xmlns" => false + def namespace_declaration? + prefix == 'xmlns' || (prefix.empty? && name == 'xmlns') + end end end #vim:ts=2 sw=2 noexpandtab: diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index d4f0bbe41..f58a39297 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -113,23 +113,27 @@ def node_type end def attributes_of element - rv = [] - each do |child| - child.each do |key,val| - rv << Attribute.new(key,val) - end if child.kind_of? AttlistDecl and child.element_name == element + attribute_declarations_of(element).map do |key, value| + Attribute.new(key, value) end - rv end def attribute_of element, attribute - att_decl = find do |child| - child.kind_of? AttlistDecl and - child.element_name == element and - child.include? attribute + attribute_declarations_of(element)[attribute] + end + + def attribute_declarations_of(name) + raw_attributes = {} + decls = select do |child| + child.kind_of?(AttlistDecl) && child.element_name == name + end + decls.each do |child| + child.each do |key, val| + # First declaration wins + raw_attributes[key] = val unless raw_attributes.key? key + end end - return nil unless att_decl - att_decl[attribute] + raw_attributes end def clone diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index abdb28a72..ef1bf9769 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -2240,15 +2240,10 @@ def length # [REXML::Attribute, bar:att='2'] # [REXML::Attribute, att='<'] # - def each_attribute # :yields: attribute - return to_enum(__method__) unless block_given? - each_value do |val| - if val.kind_of? Attribute - yield val - else - val.each_value { |atr| yield atr } - end - end + # This method doesn't iterate attributes in the DTD. This may be a bug. + # + def each_attribute(&block) # :yields: attribute + each_own_attribute(&block) end # :call-seq: @@ -2273,6 +2268,8 @@ def each_attribute # :yields: attribute # ["bar:att", "2"] # ["att", "<"] # + # This method doesn't iterate attributes in the DTD. This may be a bug. + # def each return to_enum(__method__) unless block_given? each_attribute do |attr| @@ -2300,36 +2297,7 @@ def each # attrs.get_attribute('nosuch') # => nil # def get_attribute( name ) - attr = fetch( name, nil ) - if attr.nil? - return nil if name.nil? - # Look for prefix - name =~ Namespace::NAMESPLIT - prefix, n = $1, $2 - if prefix - attr = fetch( n, nil ) - # check prefix - if attr == nil - elsif attr.kind_of? Attribute - return attr if prefix == attr.prefix - else - attr = attr[ prefix ] - return attr - end - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - attr_val = doctype.attribute_of(expn, name) - return Attribute.new( name, attr_val ) if attr_val - end - return nil - end - if attr.kind_of? Hash - attr = attr[ @element.prefix ] - end - attr + fetch(name, nil) || attlist_attributes&.[](name) end # :call-seq: @@ -2357,8 +2325,7 @@ def get_attribute( name ) # def []=( name, value ) if value.nil? # Delete the named attribute - attr = get_attribute(name) - delete attr + delete name return end @@ -2372,17 +2339,7 @@ def []=( name, value ) value = Attribute.new(name, value) end value.element = @element - old_attr = fetch(value.name, nil) - if old_attr.nil? - store(value.name, value) - elsif old_attr.kind_of? Hash - old_attr[value.prefix] = value - elsif old_attr.prefix != value.prefix - store value.name, {old_attr.prefix => old_attr, - value.prefix => value} - else - store value.name, value - end + store name, value @element end @@ -2398,20 +2355,7 @@ def []=( name, value ) # d.root.attributes.prefixes # => ["x", "y"] # def prefixes - ns = [] - each_attribute do |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - } - end - ns + namespaces.keys - ['xmlns'] end # :call-seq: @@ -2425,17 +2369,8 @@ def prefixes # def namespaces namespaces = {} - each_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - } + each_effective_attribute do |attribute| + namespaces[attribute.name] = attribute.value if attribute.namespace_declaration? end namespaces end @@ -2468,28 +2403,11 @@ def namespaces # attrs.delete(attr) # => # => # attrs.delete(attr) # => # => # - def delete( attribute ) - name = nil - prefix = nil - if attribute.kind_of? Attribute - name = attribute.name - prefix = attribute.prefix - else - attribute =~ Namespace::NAMESPLIT - prefix, name = $1, $2 - prefix = '' unless prefix - end - old = fetch(name, nil) - if old.kind_of? Hash # the supplied attribute is one of many - old.delete(prefix) - if old.size == 1 - repl = nil - old.each_value{|v| repl = v} - store name, repl - end - elsif old # the supplied attribute is a top-level one - super(name) - end + def delete(attribute_or_name) + key = attribute_or_name + key = attribute_or_name.expanded_name if attribute_or_name.kind_of? Attribute + super(key) + @element end @@ -2514,7 +2432,7 @@ def delete( attribute ) # attrs.include?('baz') # => true # def add( attribute ) - self[attribute.name] = attribute + self[attribute.expanded_name] = attribute end alias :<< :add @@ -2527,21 +2445,26 @@ def add( attribute ) # # xml_string = <<-EOT # - # + # # # EOT # d = REXML::Document.new(xml_string) - # ele = d.root.elements['//ele'] # => + # ele = d.root.elements['//ele'] # => # attrs = ele.attributes - # attrs.delete_all('att') # => [att='<'] + # attrs.delete_all('att') # => [foo:att='1', att='<'] + # attrs.each_attribute.map(&:expanded_name) #=> ['foo:other', 'other'] # def delete_all( name ) - rv = [] - each_attribute { |attribute| - rv << attribute if attribute.expanded_name == name - } - rv.each{ |attr| attr.remove } - rv + attributes = each_attribute.select do |attribute| + # For + # delete_all('foo') should not delete xmlns:foo="url" + # because it is a namespace declaration, not a normal attribute. + (!attribute.namespace_declaration? && attribute.name == name) || attribute.expanded_name == name + end + attributes.each do |attribute| + delete attribute.expanded_name + end + attributes end # :call-seq: @@ -2562,17 +2485,51 @@ def delete_all( name ) # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil # def get_attribute_ns(namespace, name) - result = nil - each_attribute() { |attribute| - if name == attribute.name && - namespace == attribute.namespace() && - ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) - # foo will match xmlns:foo, but only if foo isn't also an attribute - result = attribute if !result or !namespace.empty? or - !attribute.fully_expanded_name.index(':') + each_effective_attribute.find do |attribute| + if attribute.namespace_declaration? + # namespace declarations are not considered as attributes in this method. + # For example: + # Both elem1.get_attribute_ns('', 'foo') and elem2.get_attribute_ns('bar', 'foo') + # should not match. + false + elsif namespace.empty? && !attribute.prefix.empty? + # If prefix is present, namespace url shouldn't be empty, so it never matches. + # For example, , even if attribute.namespace returns '', + # it just means that namespace lookup failed, it shouldn't match. + false + else + name == attribute.name && namespace == attribute.namespace() end - } - result + end + end + + private + + # Iterates over the attributes of the element and those in the DTD. + # If the element has an attribute with the same name as one in the DTD, + # the element's attribute takes precedence. + def each_effective_attribute + return to_enum(__method__) unless block_given? + attr = attlist_attributes + attr = attr ? attr.merge(self) : self + attr.each_value do |attribute| + yield attribute + end + end + + alias each_own_attribute each_value + private :each_own_attribute + + def attlist_attributes + doctype = @element.document&.doctype + if doctype + expn = @element.is_a?(Document) ? doctype.name : @element.expanded_name + doctype.attribute_declarations_of(expn).map do |key, value| + attr = Attribute.new(key, value) + attr.element = @element + [key, attr] + end.to_h + end end end end diff --git a/test/test_attributes.rb b/test/test_attributes.rb index 09fde4422..af42f06ff 100644 --- a/test/test_attributes.rb +++ b/test/test_attributes.rb @@ -74,6 +74,16 @@ def test_delete assert_equal '4', doc.root.attributes['z:foo'] end + def test_delete_all + doc = Document.new("") + doc.root.attributes.delete_all 'x' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'x:y' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'xmlns:y' + assert_equal ['xmlns:x', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + end + def test_prefixes doc = Document.new("") prefixes = doc.root.attributes.prefixes diff --git a/test/test_attributes_mixin.rb b/test/test_attributes_mixin.rb index 2b9108cbc..e4d5841cb 100644 --- a/test/test_attributes_mixin.rb +++ b/test/test_attributes_mixin.rb @@ -5,8 +5,10 @@ class TestAttributes < Test::Unit::TestCase def setup @ns_a = "urn:x-test:a" @ns_b = "urn:x-test:b" + @ns_default = "urn:x-test:default" element_string = <<-"XMLEND" - Dummy title' anElement = anElement = aDoc.elements[1] - elementAttrPrefix = anElement.attributes.get_attribute('content').prefix + elementAttrPrefix = anElement.attributes.get_attribute('tpl:content').prefix aClone = anElement.clone - cloneAttrPrefix = aClone.attributes.get_attribute('content').prefix + cloneAttrPrefix = aClone.attributes.get_attribute('tpl:content').prefix assert_equal( elementAttrPrefix , cloneAttrPrefix ) end diff --git a/test/test_core.rb b/test/test_core.rb index beaaeed76..d4962507d 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -889,16 +889,25 @@ def test_attlist_decl ]> EOL assert_equal '', doc.doctype.children[0].to_s.gsub(/\s+/, " ") assert_equal 'gobble', doc.root.attributes['bar'] + assert_equal 'gobble', doc.root.attributes.get_attribute('bar').value + assert_equal 'three', doc.root.attributes.get_attribute('one:baz').value + assert_equal 'three', doc.root.attributes.get_attribute_ns('two', 'baz').value assert_equal 'xxx', doc.root.elements[2].namespace assert_equal 'two', doc.root.elements[1].namespace assert_equal 'foo', doc.root.namespace + # Not used in REXML internal any more, keep for backward compatibility + assert_equal 'gobble', doc.doctype.attribute_of('a', 'bar') + expected_attributes = [Attribute.new('bar', 'gobble'), Attribute.new('xmlns:one', 'two'), Attribute.new('one:baz', 'three')] + assert_equal expected_attributes, doc.doctype.attributes_of('a') + doc = Document.new <<~EOL ' ) expected = { - "inkscape" => attribute("xmlns:inkscape", + "xmlns:inkscape" => attribute("xmlns:inkscape", "http://www.inkscape.org/namespaces/inkscape"), - "version" => { - "inkscape" => attribute("inkscape:version", "0.44"), - "" => attribute("version", "1.0"), - }, + "inkscape:version" => attribute("inkscape:version", "0.44"), + "version" => attribute("version", "1.0"), } assert_equal(expected, doc.root.attributes.to_h)