From 20017eea807e8fa386aa5c79ae779004d8b366dd Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 25 Jun 2024 11:26:33 +0900 Subject: [PATCH] Add 3.3.1 entry Backport from https://github.com/ruby/rexml/tree/v3.3.1/lib/rexml --- lib/rexml/attlistdecl.rb | 4 +- lib/rexml/attribute.rb | 54 +- lib/rexml/cdata.rb | 2 +- lib/rexml/child.rb | 2 +- lib/rexml/comment.rb | 2 +- lib/rexml/doctype.rb | 49 +- lib/rexml/document.rb | 256 ++- lib/rexml/dtd/attlistdecl.rb | 2 +- lib/rexml/dtd/dtd.rb | 12 +- lib/rexml/dtd/elementdecl.rb | 2 +- lib/rexml/dtd/entitydecl.rb | 2 +- lib/rexml/dtd/notationdecl.rb | 2 +- lib/rexml/element.rb | 2297 +++++++++++++++++----- lib/rexml/entity.rb | 48 +- lib/rexml/formatters/default.rb | 12 +- lib/rexml/formatters/pretty.rb | 6 +- lib/rexml/formatters/transitive.rb | 2 +- lib/rexml/functions.rb | 105 +- lib/rexml/instruction.rb | 32 +- lib/rexml/light/node.rb | 12 +- lib/rexml/namespace.rb | 29 +- lib/rexml/node.rb | 18 +- lib/rexml/output.rb | 2 +- lib/rexml/parent.rb | 2 +- lib/rexml/parseexception.rb | 1 + lib/rexml/parsers/baseparser.rb | 551 +++--- lib/rexml/parsers/lightparser.rb | 6 +- lib/rexml/parsers/pullparser.rb | 6 +- lib/rexml/parsers/sax2parser.rb | 8 +- lib/rexml/parsers/streamparser.rb | 2 +- lib/rexml/parsers/treeparser.rb | 27 +- lib/rexml/parsers/ultralightparser.rb | 4 +- lib/rexml/parsers/xpathparser.rb | 332 ++-- lib/rexml/quickpath.rb | 4 +- lib/rexml/rexml.rb | 55 +- lib/rexml/source.rb | 223 ++- lib/rexml/syncenumerator.rb | 33 - lib/rexml/text.rb | 76 +- lib/rexml/undefinednamespaceexception.rb | 2 +- lib/rexml/validation/relaxng.rb | 4 +- lib/rexml/validation/validation.rb | 2 +- lib/rexml/xmldecl.rb | 40 +- lib/rexml/xpath.rb | 16 +- lib/rexml/xpath_parser.rb | 1052 ++++++---- 44 files changed, 3677 insertions(+), 1721 deletions(-) delete mode 100644 lib/rexml/syncenumerator.rb diff --git a/lib/rexml/attlistdecl.rb b/lib/rexml/attlistdecl.rb index dc1d2ad..44a91d6 100644 --- a/lib/rexml/attlistdecl.rb +++ b/lib/rexml/attlistdecl.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false #vim:ts=2 sw=2 noexpandtab: -require 'rexml/child' -require 'rexml/source' +require_relative 'child' +require_relative 'source' module REXML # This class needs: diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index ca5984e..11893a9 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -1,6 +1,6 @@ -# frozen_string_literal: false -require "rexml/namespace" -require 'rexml/text' +# frozen_string_literal: true +require_relative "namespace" +require_relative 'text' module REXML # Defines an Element Attribute; IE, a attribute=value pair, as in: @@ -13,9 +13,6 @@ module REXML # The element to which this attribute belongs attr_reader :element - # The normalized value of this attribute. That is, the attribute with - # entities intact. - attr_writer :normalized PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um @@ -67,15 +64,11 @@ module REXML # e.add_attribute( "nsa:a", "aval" ) # e.add_attribute( "b", "bval" ) # e.attributes.get_attribute( "a" ).prefix # -> "nsa" - # e.attributes.get_attribute( "b" ).prefix # -> "elns" + # e.attributes.get_attribute( "b" ).prefix # -> "" # a = Attribute.new( "x", "y" ) # a.prefix # -> "" def prefix - pf = super - if pf == "" - pf = @element.prefix if @element - end - pf + super end # Returns the namespace URL, if defined, or nil otherwise @@ -86,9 +79,26 @@ module REXML # e.add_attribute("nsx:a", "c") # e.attribute("ns:a").namespace # => "http://url" # e.attribute("nsx:a").namespace # => nil + # + # This method always returns "" for no namespace attribute. Because + # the default namespace doesn't apply to attribute names. + # + # From https://www.w3.org/TR/xml-names/#uniqAttrs + # + # > the default namespace does not apply to attribute names + # + # e = REXML::Element.new("el") + # e.add_namespace("", "http://example.com/") + # e.namespace # => "http://example.com/" + # e.add_attribute("a", "b") + # e.attribute("a").namespace # => "" def namespace arg=nil arg = prefix if arg.nil? - @element.namespace arg + if arg == "" + "" + else + @element.namespace(arg) + end end # Returns true if other is an Attribute and has the same name and value, @@ -109,10 +119,13 @@ module REXML # b = Attribute.new( "ns:x", "y" ) # b.to_string # -> "ns:x='y'" def to_string + value = to_s if @element and @element.context and @element.context[:attribute_quote] == :quote - %Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^ + value = value.gsub('"', '"') if value.include?('"') + %Q^#@expanded_name="#{value}"^ else - "#@expanded_name='#{to_s().gsub(/'/, ''')}'" + value = value.gsub("'", ''') if value.include?("'") + "#@expanded_name='#{value}'" end end @@ -128,7 +141,6 @@ module REXML return @normalized if @normalized @normalized = Text::normalize( @unnormalized, doctype ) - @unnormalized = nil @normalized end @@ -137,10 +149,16 @@ module REXML def value return @unnormalized if @unnormalized @unnormalized = Text::unnormalize( @normalized, doctype ) - @normalized = nil @unnormalized end + # The normalized value of this attribute. That is, the attribute with + # entities intact. + def normalized=(new_normalized) + @normalized = new_normalized + @unnormalized = nil + end + # Returns a copy of this attribute def clone Attribute.new self @@ -177,7 +195,7 @@ module REXML end def inspect - rv = "" + rv = +"" write( rv ) rv end diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index 2238446..997f5a0 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/text" +require_relative "text" module REXML class CData < Text diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb index d23451e..cc6e9a4 100644 --- a/lib/rexml/child.rb +++ b/lib/rexml/child.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/node" +require_relative "node" module REXML ## diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb index 822fe0d..52c58b4 100644 --- a/lib/rexml/comment.rb +++ b/lib/rexml/comment.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "child" module REXML ## diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index cb9bf57..f359048 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -1,20 +1,25 @@ # frozen_string_literal: false -require "rexml/parent" -require "rexml/parseexception" -require "rexml/namespace" -require 'rexml/entity' -require 'rexml/attlistdecl' -require 'rexml/xmltokens' +require_relative "parent" +require_relative "parseexception" +require_relative "namespace" +require_relative 'entity' +require_relative 'attlistdecl' +require_relative 'xmltokens' module REXML class ReferenceWriter def initialize(id_type, public_id_literal, - system_literal) + system_literal, + context=nil) @id_type = id_type @public_id_literal = public_id_literal @system_literal = system_literal - @default_quote = "\"" + if context and context[:prologue_quote] == :apostrophe + @default_quote = "'" + else + @default_quote = "\"" + end end def write(output) @@ -150,7 +155,8 @@ module REXML if @external_id reference_writer = ReferenceWriter.new(@external_id, @long_name, - @uri) + @uri, + context) reference_writer.write(output) end unless @children.empty? @@ -165,7 +171,11 @@ module REXML end def context - @parent.context + if @parent + @parent.context + else + nil + end end def entity( name ) @@ -187,7 +197,7 @@ module REXML when "SYSTEM" nil when "PUBLIC" - strip_quotes(@long_name) + @long_name end end @@ -197,9 +207,9 @@ module REXML def system case @external_id when "SYSTEM" - strip_quotes(@long_name) + @long_name when "PUBLIC" - @uri.kind_of?(String) ? strip_quotes(@uri) : nil + @uri.kind_of?(String) ? @uri : nil end end @@ -221,15 +231,6 @@ module REXML notation_decl.name == name } end - - private - - # Method contributed by Henrik Martensson - def strip_quotes(quoted_string) - quoted_string =~ /^[\'\"].*[\'\"]$/ ? - quoted_string[1, quoted_string.length-2] : - quoted_string - end end # We don't really handle any of these since we're not a validating @@ -287,8 +288,10 @@ module REXML end def to_s + context = nil + context = parent.context if parent notation = "" notation diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 806bc49..b1caa02 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -1,38 +1,94 @@ # frozen_string_literal: false -require "rexml/security" -require "rexml/element" -require "rexml/xmldecl" -require "rexml/source" -require "rexml/comment" -require "rexml/doctype" -require "rexml/instruction" -require "rexml/rexml" -require "rexml/parseexception" -require "rexml/output" -require "rexml/parsers/baseparser" -require "rexml/parsers/streamparser" -require "rexml/parsers/treeparser" +require_relative "security" +require_relative "element" +require_relative "xmldecl" +require_relative "source" +require_relative "comment" +require_relative "doctype" +require_relative "instruction" +require_relative "rexml" +require_relative "parseexception" +require_relative "output" +require_relative "parsers/baseparser" +require_relative "parsers/streamparser" +require_relative "parsers/treeparser" module REXML - # Represents a full XML document, including PIs, a doctype, etc. A - # Document has a single child that can be accessed by root(). - # Note that if you want to have an XML declaration written for a document - # you create, you must add one; REXML documents do not write a default - # declaration for you. See |DECLARATION| and |write|. + # Represents an XML document. + # + # A document may have: + # + # - A single child that may be accessed via method #root. + # - An XML declaration. + # - A document type. + # - Processing instructions. + # + # == In a Hurry? + # + # If you're somewhat familiar with XML + # and have a particular task in mind, + # you may want to see the + # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html], + # and in particular, the + # {tasks page for documents}[../doc/rexml/tasks/tocs/document_toc_rdoc.html]. + # class Document < Element - # A convenient default XML declaration. If you want an XML declaration, - # the easiest way to add one is mydoc << Document::DECLARATION - # +DEPRECATED+ - # Use: mydoc << XMLDecl.default + # A convenient default XML declaration. Use: + # + # mydoc << XMLDecl.default + # DECLARATION = XMLDecl.default - # Constructor - # @param source if supplied, must be a Document, String, or IO. - # Documents have their context and Element attributes cloned. - # Strings are expected to be valid XML documents. IOs are expected - # to be sources of valid XML documents. - # @param context if supplied, contains the context of the document; - # this should be a Hash. + # :call-seq: + # new(string = nil, context = {}) -> new_document + # new(io_stream = nil, context = {}) -> new_document + # new(document = nil, context = {}) -> new_document + # + # Returns a new \REXML::Document object. + # + # When no arguments are given, + # returns an empty document: + # + # d = REXML::Document.new + # d.to_s # => "" + # + # When argument +string+ is given, it must be a string + # containing a valid XML document: + # + # xml_string = 'FooBar' + # d = REXML::Document.new(xml_string) + # d.to_s # => "FooBar" + # + # When argument +io_stream+ is given, it must be an \IO object + # that is opened for reading, and when read must return a valid XML document: + # + # File.write('t.xml', xml_string) + # d = File.open('t.xml', 'r') do |io| + # REXML::Document.new(io) + # end + # d.to_s # => "FooBar" + # + # When argument +document+ is given, it must be an existing + # document object, whose context and attributes (but not children) + # are cloned into the new document: + # + # d = REXML::Document.new(xml_string) + # d.children # => [ ... ] + # d.context = {raw: :all, compress_whitespace: :all} + # d.add_attributes({'bar' => 0, 'baz' => 1}) + # d1 = REXML::Document.new(d) + # d1.children # => [] + # d1.context # => {:raw=>:all, :compress_whitespace=>:all} + # d1.attributes # => {"bar"=>bar='0', "baz"=>baz='1'} + # + # When argument +context+ is given, it must be a hash + # containing context entries for the document; + # see {Element Context}[../doc/rexml/context_rdoc.html]: + # + # context = {raw: :all, compress_whitespace: :all} + # d = REXML::Document.new(xml_string, context) + # d.context # => {:raw=>:all, :compress_whitespace=>:all} + # def initialize( source = nil, context = {} ) @entity_expansion_count = 0 super() @@ -46,26 +102,71 @@ module REXML end end + # :call-seq: + # node_type -> :document + # + # Returns the symbol +:document+. + # def node_type :document end - # Should be obvious + # :call-seq: + # clone -> new_document + # + # Returns the new document resulting from executing + # Document.new(self). See Document.new. + # def clone Document.new self end - # According to the XML spec, a root node has no expanded name + # :call-seq: + # expanded_name -> empty_string + # + # Returns an empty string. + # def expanded_name '' #d = doc_type #d ? d.name : "UNDEFINED" end - alias :name :expanded_name - # We override this, because XMLDecls and DocTypes must go at the start - # of the document + # :call-seq: + # add(xml_decl) -> self + # add(doc_type) -> self + # add(object) -> self + # + # Adds an object to the document; returns +self+. + # + # When argument +xml_decl+ is given, + # it must be an REXML::XMLDecl object, + # which becomes the XML declaration for the document, + # replacing the previous XML declaration if any: + # + # d = REXML::Document.new + # d.xml_decl.to_s # => "" + # d.add(REXML::XMLDecl.new('2.0')) + # d.xml_decl.to_s # => "" + # + # When argument +doc_type+ is given, + # it must be an REXML::DocType object, + # which becomes the document type for the document, + # replacing the previous document type, if any: + # + # d = REXML::Document.new + # d.doctype.to_s # => "" + # d.add(REXML::DocType.new('foo')) + # d.doctype.to_s # => "" + # + # When argument +object+ (not an REXML::XMLDecl or REXML::DocType object) + # is given it is added as the last child: + # + # d = REXML::Document.new + # d.add(REXML::Element.new('foo')) + # d.to_s # => "" + # def add( child ) if child.kind_of? XMLDecl if @children[0].kind_of? XMLDecl @@ -99,49 +200,108 @@ module REXML end alias :<< :add + # :call-seq: + # add_element(name_or_element = nil, attributes = nil) -> new_element + # + # Adds an element to the document by calling REXML::Element.add_element: + # + # REXML::Element.add_element(name_or_element, attributes) def add_element(arg=nil, arg2=nil) rv = super raise "attempted adding second root element to document" if @elements.size > 1 rv end - # @return the root Element of the document, or nil if this document - # has no children. + # :call-seq: + # root -> root_element or nil + # + # Returns the root element of the document, if it exists, otherwise +nil+: + # + # d = REXML::Document.new('') + # d.root # => + # d = REXML::Document.new('') + # d.root # => nil + # def root elements[1] #self #@children.find { |item| item.kind_of? Element } end - # @return the DocType child of the document, if one exists, - # and nil otherwise. + # :call-seq: + # doctype -> doc_type or nil + # + # Returns the DocType object for the document, if it exists, otherwise +nil+: + # + # d = REXML::Document.new('') + # d.doctype.class # => REXML::DocType + # d = REXML::Document.new('') + # d.doctype.class # => nil + # def doctype @children.find { |item| item.kind_of? DocType } end - # @return the XMLDecl of this document; if no XMLDecl has been - # set, the default declaration is returned. + # :call-seq: + # xml_decl -> xml_decl + # + # Returns the XMLDecl object for the document, if it exists, + # otherwise the default XMLDecl object: + # + # d = REXML::Document.new('') + # d.xml_decl.class # => REXML::XMLDecl + # d.xml_decl.to_s # => "" + # d = REXML::Document.new('') + # d.xml_decl.class # => REXML::XMLDecl + # d.xml_decl.to_s # => "" + # def xml_decl rv = @children[0] return rv if rv.kind_of? XMLDecl @children.unshift(XMLDecl.default)[0] end - # @return the XMLDecl version of this document as a String. - # If no XMLDecl has been set, returns the default version. + # :call-seq: + # version -> version_string + # + # Returns the XMLDecl version of this document as a string, + # if it has been set, otherwise the default version: + # + # d = REXML::Document.new('') + # d.version # => "2.0" + # d = REXML::Document.new('') + # d.version # => "1.0" + # def version xml_decl().version end - # @return the XMLDecl encoding of this document as an - # Encoding object. - # If no XMLDecl has been set, returns the default encoding. + # :call-seq: + # encoding -> encoding_string + # + # Returns the XMLDecl encoding of the document, + # if it has been set, otherwise the default encoding: + # + # d = REXML::Document.new('') + # d.encoding # => "UTF-16" + # d = REXML::Document.new('') + # d.encoding # => "UTF-8" + # def encoding xml_decl().encoding end - # @return the XMLDecl standalone value of this document as a String. - # If no XMLDecl has been set, returns the default setting. + # :call-seq: + # stand_alone? + # + # Returns the XMLDecl standalone value of the document as a string, + # if it has been set, otherwise the default standalone value: + # + # d = REXML::Document.new('') + # d.stand_alone? # => "yes" + # d = REXML::Document.new('') + # d.stand_alone? # => nil + # def stand_alone? xml_decl().stand_alone? end @@ -226,7 +386,7 @@ module REXML end formatter = if indent > -1 if transitive - require "rexml/formatters/transitive" + require_relative "formatters/transitive" REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) diff --git a/lib/rexml/dtd/attlistdecl.rb b/lib/rexml/dtd/attlistdecl.rb index 32847da..1326cb2 100644 --- a/lib/rexml/dtd/attlistdecl.rb +++ b/lib/rexml/dtd/attlistdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class AttlistDecl < Child diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb index 927d5d8..8b0f2d7 100644 --- a/lib/rexml/dtd/dtd.rb +++ b/lib/rexml/dtd/dtd.rb @@ -1,10 +1,10 @@ # frozen_string_literal: false -require "rexml/dtd/elementdecl" -require "rexml/dtd/entitydecl" -require "rexml/comment" -require "rexml/dtd/notationdecl" -require "rexml/dtd/attlistdecl" -require "rexml/parent" +require_relative "elementdecl" +require_relative "entitydecl" +require_relative "../comment" +require_relative "notationdecl" +require_relative "attlistdecl" +require_relative "../parent" module REXML module DTD diff --git a/lib/rexml/dtd/elementdecl.rb b/lib/rexml/dtd/elementdecl.rb index 119fd41..20ed023 100644 --- a/lib/rexml/dtd/elementdecl.rb +++ b/lib/rexml/dtd/elementdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class ElementDecl < Child diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb index 45707e2..312df65 100644 --- a/lib/rexml/dtd/entitydecl.rb +++ b/lib/rexml/dtd/entitydecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class EntityDecl < Child diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb index cfdf0b9..04a9b08 100644 --- a/lib/rexml/dtd/notationdecl.rb +++ b/lib/rexml/dtd/notationdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class NotationDecl < Child diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index ac9b108..a5808d7 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -1,23 +1,273 @@ # frozen_string_literal: false -require "rexml/parent" -require "rexml/namespace" -require "rexml/attribute" -require "rexml/cdata" -require "rexml/xpath" -require "rexml/parseexception" +require_relative "parent" +require_relative "namespace" +require_relative "attribute" +require_relative "cdata" +require_relative "xpath" +require_relative "parseexception" module REXML - # An implementation note about namespaces: - # As we parse, when we find namespaces we put them in a hash and assign - # them a unique ID. We then convert the namespace prefix for the node - # to the unique ID. This makes namespace lookup much faster for the - # cost of extra memory use. We save the namespace prefix for the - # context node and convert it back when we write it. - @@namespaces = {} - - # Represents a tagged XML element. Elements are characterized by - # having children, attributes, and names, and can themselves be - # children. + # An \REXML::Element object represents an XML element. + # + # An element: + # + # - Has a name (string). + # - May have a parent (another element). + # - Has zero or more children + # (other elements, text, CDATA, processing instructions, and comments). + # - Has zero or more siblings + # (other elements, text, CDATA, processing instructions, and comments). + # - Has zero or more named attributes. + # + # == In a Hurry? + # + # If you're somewhat familiar with XML + # and have a particular task in mind, + # you may want to see the + # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html], + # and in particular, the + # {tasks page for elements}[../doc/rexml/tasks/tocs/element_toc_rdoc.html]. + # + # === Name + # + # An element has a name, which is initially set when the element is created: + # + # e = REXML::Element.new('foo') + # e.name # => "foo" + # + # The name may be changed: + # + # e.name = 'bar' + # e.name # => "bar" + # + # + # === \Parent + # + # An element may have a parent. + # + # Its parent may be assigned explicitly when the element is created: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar', e0) + # e1.parent # => ... + # + # Note: the representation of an element always shows the element's name. + # If the element has children, the representation indicates that + # by including an ellipsis (...). + # + # The parent may be assigned explicitly at any time: + # + # e2 = REXML::Element.new('baz') + # e1.parent = e2 + # e1.parent # => + # + # When an element is added as a child, its parent is set automatically: + # + # e1.add_element(e0) + # e0.parent # => ... + # + # For an element that has no parent, method +parent+ returns +nil+. + # + # === Children + # + # An element has zero or more children. + # The children are an ordered collection + # of all objects whose parent is the element itself. + # + # The children may include any combination of elements, text, comments, + # processing instructions, and CDATA. + # (This example keeps things clean by controlling whitespace + # via a +context+ setting.) + # + # xml_string = <<-EOT + # + # + # text 0 + # + # + # + # + # text 1 + # + # + # + # + # EOT + # context = {ignore_whitespace_nodes: :all, compress_whitespace: :all} + # d = REXML::Document.new(xml_string, context) + # root = d.root + # root.children.size # => 10 + # root.each {|child| p "#{child.class}: #{child}" } + # + # Output: + # + # "REXML::Element: " + # "REXML::Text: \n text 0\n " + # "REXML::Comment: comment 0" + # "REXML::Instruction: " + # "REXML::CData: cdata 0" + # "REXML::Element: " + # "REXML::Text: \n text 1\n " + # "REXML::Comment: comment 1" + # "REXML::Instruction: " + # "REXML::CData: cdata 1" + # + # A child may be added using inherited methods + # Parent#insert_before or Parent#insert_after: + # + # xml_string = '' + # d = REXML::Document.new(xml_string) + # root = d.root + # c = d.root[1] # => + # root.insert_before(c, REXML::Element.new('b')) + # root.to_a # => [, , , ] + # + # A child may be replaced using Parent#replace_child: + # + # root.replace_child(c, REXML::Element.new('x')) + # root.to_a # => [, , , ] + # + # A child may be removed using Parent#delete: + # + # x = root[2] # => + # root.delete(x) + # root.to_a # => [, , ] + # + # === Siblings + # + # An element has zero or more siblings, + # which are the other children of the element's parent. + # + # In the example above, element +ele_1+ is between a CDATA sibling + # and a text sibling: + # + # ele_1 = root[5] # => + # ele_1.previous_sibling # => "cdata 0" + # ele_1.next_sibling # => "\n text 1\n " + # + # === \Attributes + # + # An element has zero or more named attributes. + # + # A new element has no attributes: + # + # e = REXML::Element.new('foo') + # e.attributes # => {} + # + # Attributes may be added: + # + # e.add_attribute('bar', 'baz') + # e.add_attribute('bat', 'bam') + # e.attributes.size # => 2 + # e['bar'] # => "baz" + # e['bat'] # => "bam" + # + # An existing attribute may be modified: + # + # e.add_attribute('bar', 'bad') + # e.attributes.size # => 2 + # e['bar'] # => "bad" + # + # An existing attribute may be deleted: + # + # e.delete_attribute('bar') + # e.attributes.size # => 1 + # e['bar'] # => nil + # + # == What's Here + # + # To begin with, what's elsewhere? + # + # \Class \REXML::Element inherits from its ancestor classes: + # + # - REXML::Child + # - REXML::Parent + # + # \REXML::Element itself and its ancestors also include modules: + # + # - {Enumerable}[https://docs.ruby-lang.org/en/master/Enumerable.html] + # - REXML::Namespace + # - REXML::Node + # - REXML::XMLTokens + # + # === Methods for Creating an \Element + # + # ::new:: Returns a new empty element. + # #clone:: Returns a clone of another element. + # + # === Methods for Attributes + # + # {[attribute_name]}[#method-i-5B-5D]:: Returns an attribute value. + # #add_attribute:: Adds a new attribute. + # #add_attributes:: Adds multiple new attributes. + # #attribute:: Returns the attribute value for a given name and optional namespace. + # #delete_attribute:: Removes an attribute. + # + # === Methods for Children + # + # {[index]}[#method-i-5B-5D]:: Returns the child at the given offset. + # #add_element:: Adds an element as the last child. + # #delete_element:: Deletes a child element. + # #each_element:: Calls the given block with each child element. + # #each_element_with_attribute:: Calls the given block with each child element + # that meets given criteria, + # which can include the attribute name. + # #each_element_with_text:: Calls the given block with each child element + # that meets given criteria, + # which can include text. + # #get_elements:: Returns an array of element children that match a given xpath. + # + # === Methods for \Text Children + # + # #add_text:: Adds a text node to the element. + # #get_text:: Returns a text node that meets specified criteria. + # #text:: Returns the text string from the first node that meets specified criteria. + # #texts:: Returns an array of the text children of the element. + # #text=:: Adds, removes, or replaces the first text child of the element + # + # === Methods for Other Children + # + # #cdatas:: Returns an array of the cdata children of the element. + # #comments:: Returns an array of the comment children of the element. + # #instructions:: Returns an array of the instruction children of the element. + # + # === Methods for Namespaces + # + # #add_namespace:: Adds a namespace to the element. + # #delete_namespace:: Removes a namespace from the element. + # #namespace:: Returns the string namespace URI for the element. + # #namespaces:: Returns a hash of all defined namespaces in the element. + # #prefixes:: Returns an array of the string prefixes (names) + # of all defined namespaces in the element + # + # === Methods for Querying + # + # #document:: Returns the document, if any, that the element belongs to. + # #root:: Returns the most distant element (not document) ancestor of the element. + # #root_node:: Returns the most distant ancestor of the element. + # #xpath:: Returns the string xpath to the element + # relative to the most distant parent + # #has_attributes?:: Returns whether the element has attributes. + # #has_elements?:: Returns whether the element has elements. + # #has_text?:: Returns whether the element has text. + # #next_element:: Returns the next sibling that is an element. + # #previous_element:: Returns the previous sibling that is an element. + # #raw:: Returns whether raw mode is set for the element. + # #whitespace:: Returns whether whitespace is respected for the element. + # #ignore_whitespace_nodes:: Returns whether whitespace nodes + # are to be ignored for the element. + # #node_type:: Returns symbol :element. + # + # === One More Method + # + # #inspect:: Returns a string representation of the element. + # + # === Accessors + # + # #elements:: Returns the REXML::Elements object for the element. + # #attributes:: Returns the REXML::Attributes object for the element. + # #context:: Returns or sets the context hash for the element. + # class Element < Parent include Namespace @@ -30,32 +280,42 @@ module REXML # whitespace handling. attr_accessor :context - # Constructor - # arg:: - # if not supplied, will be set to the default value. - # If a String, the name of this object will be set to the argument. - # If an Element, the object will be shallowly cloned; name, - # attributes, and namespaces will be copied. Children will +not+ be - # copied. - # parent:: - # if supplied, must be a Parent, and will be used as - # the parent of this object. - # context:: - # If supplied, must be a hash containing context items. Context items - # include: - # * :respect_whitespace the value of this is :+all+ or an array of - # strings being the names of the elements to respect - # whitespace for. Defaults to :+all+. - # * :compress_whitespace the value can be :+all+ or an array of - # strings being the names of the elements to ignore whitespace on. - # Overrides :+respect_whitespace+. - # * :ignore_whitespace_nodes the value can be :+all+ or an array - # of strings being the names of the elements in which to ignore - # whitespace-only nodes. If this is set, Text nodes which contain only - # whitespace will not be added to the document tree. - # * :raw can be :+all+, or an array of strings being the names of - # the elements to process in raw mode. In raw mode, special - # characters in text is not converted to or from entities. + # :call-seq: + # Element.new(name = 'UNDEFINED', parent = nil, context = nil) -> new_element + # Element.new(element, parent = nil, context = nil) -> new_element + # + # Returns a new \REXML::Element object. + # + # When no arguments are given, + # returns an element with name 'UNDEFINED': + # + # e = REXML::Element.new # => + # e.class # => REXML::Element + # e.name # => "UNDEFINED" + # + # When only argument +name+ is given, + # returns an element of the given name: + # + # REXML::Element.new('foo') # => + # + # When only argument +element+ is given, it must be an \REXML::Element object; + # returns a shallow copy of the given element: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new(e0) # => + # + # When argument +parent+ is also given, it must be an REXML::Parent object: + # + # e = REXML::Element.new('foo', REXML::Parent.new) + # e.parent # => #]> + # + # When argument +context+ is also given, it must be a hash + # representing the context for the element; + # see {Element Context}[../doc/rexml/context_rdoc.html]: + # + # e = REXML::Element.new('foo', nil, {raw: :all}) + # e.context # => {:raw=>:all} + # def initialize( arg = UNDEFINED, parent=nil, context=nil ) super(parent) @@ -74,6 +334,27 @@ module REXML end end + # :call-seq: + # inspect -> string + # + # Returns a string representation of the element. + # + # For an element with no attributes and no children, shows the element name: + # + # REXML::Element.new.inspect # => "" + # + # Shows attributes, if any: + # + # e = REXML::Element.new('foo') + # e.add_attributes({'bar' => 0, 'baz' => 1}) + # e.inspect # => "" + # + # Shows an ellipsis (...), if there are child elements: + # + # e.add_element(REXML::Element.new('bar')) + # e.add_element(REXML::Element.new('baz')) + # e.inspect # => " ... " + # def inspect rv = "<#@expanded_name" @@ -89,60 +370,118 @@ module REXML end end - - # Creates a shallow copy of self. - # d = Document.new "" - # new_a = d.root.clone - # puts new_a # => "" + # :call-seq: + # clone -> new_element + # + # Returns a shallow copy of the element, containing the name and attributes, + # but not the parent or children: + # + # e = REXML::Element.new('foo') + # e.add_attributes({'bar' => 0, 'baz' => 1}) + # e.clone # => + # def clone self.class.new self end - # Evaluates to the root node of the document that this element - # belongs to. If this element doesn't belong to a document, but does - # belong to another Element, the parent's root will be returned, until the - # earliest ancestor is found. - # - # Note that this is not the same as the document element. - # In the following example, is the document element, and the root - # node is the parent node of the document element. You may ask yourself - # why the root node is useful: consider the doctype and XML declaration, - # and any processing instructions before the document element... they - # are children of the root node, or siblings of the document element. - # The only time this isn't true is when an Element is created that is - # not part of any Document. In this case, the ancestor that has no - # parent acts as the root node. - # d = Document.new '' - # a = d[1] ; c = a[1][1] - # d.root_node == d # TRUE - # a.root_node # namely, d - # c.root_node # again, d + # :call-seq: + # root_node -> document or element + # + # Returns the most distant ancestor of +self+. + # + # When the element is part of a document, + # returns the root node of the document. + # Note that the root node is different from the document element; + # in this example +a+ is document element and the root node is its parent: + # + # d = REXML::Document.new('') + # top_element = d.first # => ... + # child = top_element.first # => ... + # d.root_node == d # => true + # top_element.root_node == d # => true + # child.root_node == d # => true + # + # When the element is not part of a document, but does have ancestor elements, + # returns the most distant ancestor element: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar') + # e1.parent = e0 + # e2 = REXML::Element.new('baz') + # e2.parent = e1 + # e2.root_node == e0 # => true + # + # When the element has no ancestor elements, + # returns +self+: + # + # e = REXML::Element.new('foo') + # e.root_node == e # => true + # + # Related: #root, #document. + # def root_node parent.nil? ? self : parent.root_node end + # :call-seq: + # root -> element + # + # Returns the most distant _element_ (not document) ancestor of the element: + # + # d = REXML::Document.new('') + # top_element = d.first + # child = top_element.first + # top_element.root == top_element # => true + # child.root == top_element # => true + # + # For a document, returns the topmost element: + # + # d.root == top_element # => true + # + # Related: #root_node, #document. + # def root return elements[1] if self.kind_of? Document return self if parent.kind_of? Document or parent.nil? return parent.root end - # Evaluates to the document to which this element belongs, or nil if this - # element doesn't belong to a document. + # :call-seq: + # document -> document or nil + # + # If the element is part of a document, returns that document: + # + # d = REXML::Document.new('') + # top_element = d.first + # child = top_element.first + # top_element.document == d # => true + # child.document == d # => true + # + # If the element is not part of a document, returns +nil+: + # + # REXML::Element.new.document # => nil + # + # For a document, returns +self+: + # + # d.document == d # => true + # + # Related: #root, #root_node. + # def document rt = root rt.parent if rt end - # Evaluates to +true+ if whitespace is respected for this element. This - # is the case if: - # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value - # 2. The context has :+respect_whitespace+ set to :+all+ or - # an array containing the name of this element, and - # :+compress_whitespace+ isn't set to :+all+ or an array containing the - # name of this element. - # The evaluation is tested against +expanded_name+, and so is namespace - # sensitive. + # :call-seq: + # whitespace + # + # Returns +true+ if whitespace is respected for this element, + # +false+ otherwise. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. + # + # The evaluation is tested against the element's +expanded_name+, + # and so is namespace-sensitive. def whitespace @whitespace = nil if @context @@ -159,6 +498,13 @@ module REXML @whitespace end + # :call-seq: + # ignore_whitespace_nodes + # + # Returns +true+ if whitespace nodes are ignored for the element. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. + # def ignore_whitespace_nodes @ignore_whitespace_nodes = false if @context @@ -170,9 +516,12 @@ module REXML end end - # Evaluates to +true+ if raw mode is set for this element. This - # is the case if the context has :+raw+ set to :+all+ or - # an array containing the name of this element. + # :call-seq: + # raw + # + # Returns +true+ if raw mode is set for the element. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. # # The evaluation is tested against +expanded_name+, and so is namespace # sensitive. @@ -180,7 +529,7 @@ module REXML @raw = (@context and @context[:raw] and (@context[:raw] == :all or @context[:raw].include? expanded_name)) - @raw + @raw end #once :whitespace, :raw, :ignore_whitespace_nodes @@ -189,10 +538,25 @@ module REXML # Namespaces # ################################################# - # Evaluates to an +Array+ containing the prefixes (names) of all defined - # namespaces at this context node. - # doc = Document.new("") - # doc.elements['//b'].prefixes # -> ['x', 'y'] + # :call-seq: + # prefixes -> array_of_namespace_prefixes + # + # Returns an array of the string prefixes (names) of all defined namespaces + # in the element and its ancestors: + # + # xml_string = <<-EOT + # + # + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string, {compress_whitespace: :all}) + # d.elements['//a'].prefixes # => ["x", "y"] + # d.elements['//b'].prefixes # => ["x", "y"] + # d.elements['//c'].prefixes # => ["x", "y", "z"] + # def prefixes prefixes = [] prefixes = parent.prefixes if parent @@ -200,6 +564,25 @@ module REXML return prefixes end + # :call-seq: + # namespaces -> array_of_namespace_names + # + # Returns a hash of all defined namespaces + # in the element and its ancestors: + # + # xml_string = <<-EOT + # + # + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # d.elements['//a'].namespaces # => {"x"=>"1", "y"=>"2"} + # d.elements['//b'].namespaces # => {"x"=>"1", "y"=>"2"} + # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} + # def namespaces namespaces = {} namespaces = parent.namespaces if parent @@ -207,19 +590,26 @@ module REXML return namespaces end - # Evaluates to the URI for a prefix, or the empty string if no such - # namespace is declared for this element. Evaluates recursively for - # ancestors. Returns the default namespace, if there is one. - # prefix:: - # the prefix to search for. If not supplied, returns the default - # namespace if one exists - # Returns:: - # the namespace URI as a String, or nil if no such namespace - # exists. If the namespace is undefined, returns an empty string - # doc = Document.new("") - # b = doc.elements['//b'] - # b.namespace # -> '1' - # b.namespace("y") # -> '2' + # :call-seq: + # namespace(prefix = nil) -> string_uri or nil + # + # Returns the string namespace URI for the element, + # possibly deriving from one of its ancestors. + # + # xml_string = <<-EOT + # + # + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # b = d.elements['//b'] + # b.namespace # => "1" + # b.namespace('y') # => "2" + # b.namespace('nosuch') # => nil + # def namespace(prefix=nil) if prefix.nil? prefix = prefix() @@ -235,19 +625,24 @@ module REXML return ns end - # Adds a namespace to this element. - # prefix:: - # the prefix string, or the namespace URI if +uri+ is not - # supplied - # uri:: - # the namespace URI. May be nil, in which +prefix+ is used as - # the URI - # Evaluates to: this Element - # a = Element.new("a") - # a.add_namespace("xmlns:foo", "bar" ) - # a.add_namespace("foo", "bar") # shorthand for previous line - # a.add_namespace("twiddle") - # puts a #-> + # :call-seq: + # add_namespace(prefix, uri = nil) -> self + # + # Adds a namespace to the element; returns +self+. + # + # With the single argument +prefix+, + # adds a namespace using the given +prefix+ and the namespace URI: + # + # e = REXML::Element.new('foo') + # e.add_namespace('bar') + # e.namespaces # => {"xmlns"=>"bar"} + # + # With both arguments +prefix+ and +uri+ given, + # adds a namespace using both arguments: + # + # e.add_namespace('baz', 'bat') + # e.namespaces # => {"xmlns"=>"bar", "baz"=>"bat"} + # def add_namespace( prefix, uri=nil ) unless uri @attributes["xmlns"] = prefix @@ -258,16 +653,28 @@ module REXML self end - # Removes a namespace from this node. This only works if the namespace is - # actually declared in this node. If no argument is passed, deletes the - # default namespace. + # :call-seq: + # delete_namespace(namespace = 'xmlns') -> self + # + # Removes a namespace from the element. + # + # With no argument, removes the default namespace: + # + # d = REXML::Document.new "" + # d.to_s # => "" + # d.root.delete_namespace # => + # d.to_s # => "" + # + # With argument +namespace+, removes the specified namespace: + # + # d.root.delete_namespace('foo') + # d.to_s # => "" + # + # Does nothing if no such namespace is found: + # + # d.root.delete_namespace('nosuch') + # d.to_s # => "" # - # Evaluates to: this element - # doc = Document.new "" - # doc.root.delete_namespace - # puts doc # -> - # doc.root.delete_namespace 'foo' - # puts doc # -> def delete_namespace namespace="xmlns" namespace = "xmlns:#{namespace}" unless namespace == 'xmlns' attribute = attributes.get_attribute(namespace) @@ -279,20 +686,40 @@ module REXML # Elements # ################################################# - # Adds a child to this element, optionally setting attributes in - # the element. - # element:: - # optional. If Element, the element is added. - # Otherwise, a new Element is constructed with the argument (see - # Element.initialize). - # attrs:: - # If supplied, must be a Hash containing String name,value - # pairs, which will be used to set the attributes of the new Element. - # Returns:: the Element that was added - # el = doc.add_element 'my-tag' - # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'} - # el = Element.new 'my-tag' - # doc.add_element el + # :call-seq: + # add_element(name, attributes = nil) -> new_element + # add_element(element, attributes = nil) -> element + # + # Adds a child element, optionally setting attributes + # on the added element; returns the added element. + # + # With string argument +name+, creates a new element with that name + # and adds the new element as a child: + # + # e0 = REXML::Element.new('foo') + # e0.add_element('bar') + # e0[0] # => + # + # + # With argument +name+ and hash argument +attributes+, + # sets attributes on the new element: + # + # e0.add_element('baz', {'bat' => '0', 'bam' => '1'}) + # e0[1] # => + # + # With element argument +element+, adds that element as a child: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar') + # e0.add_element(e1) + # e0[0] # => + # + # With argument +element+ and hash argument +attributes+, + # sets attributes on the added element: + # + # e0.add_element(e1, {'bat' => '0', 'bam' => '1'}) + # e0[1] # => + # def add_element element, attrs=nil raise "First argument must be either an element name, or an Element object" if element.nil? el = @elements.add(element) @@ -302,52 +729,112 @@ module REXML el end + # :call-seq: + # delete_element(index) -> removed_element or nil + # delete_element(element) -> removed_element or nil + # delete_element(xpath) -> removed_element or nil + # # Deletes a child element. - # element:: - # Must be an +Element+, +String+, or +Integer+. If Element, - # the element is removed. If String, the element is found (via XPath) - # and removed. This means that any parent can remove any - # descendant. If Integer, the Element indexed by that number will be - # removed. - # Returns:: the element that was removed. - # doc.delete_element "/a/b/c[@id='4']" - # doc.delete_element doc.elements["//k"] - # doc.delete_element 1 + # + # When 1-based integer argument +index+ is given, + # removes and returns the child element at that offset if it exists; + # indexing does not include text nodes; + # returns +nil+ if the element does not exist: + # + # d = REXML::Document.new 'text' + # a = d.root # => ... + # a.delete_element(1) # => + # a.delete_element(1) # => + # a.delete_element(1) # => nil + # + # When element argument +element+ is given, + # removes and returns that child element if it exists, + # otherwise returns +nil+: + # + # d = REXML::Document.new 'text' + # a = d.root # => ... + # c = a[2] # => + # a.delete_element(c) # => + # a.delete_element(c) # => nil + # + # When xpath argument +xpath+ is given, + # removes and returns the element at xpath if it exists, + # otherwise returns +nil+: + # + # d = REXML::Document.new 'text' + # a = d.root # => ... + # a.delete_element('//c') # => + # a.delete_element('//c') # => nil + # def delete_element element @elements.delete element end - # Evaluates to +true+ if this element has at least one child Element - # doc = Document.new "Text" - # doc.root.has_elements # -> true - # doc.elements["/a/b"].has_elements # -> false - # doc.elements["/a/c"].has_elements # -> false + # :call-seq: + # has_elements? + # + # Returns +true+ if the element has one or more element children, + # +false+ otherwise: + # + # d = REXML::Document.new 'text' + # a = d.root # => ... + # a.has_elements? # => true + # b = a[0] # => + # b.has_elements? # => false + # def has_elements? !@elements.empty? end - # Iterates through the child elements, yielding for each Element that - # has a particular attribute set. - # key:: - # the name of the attribute to search for - # value:: - # the value of the attribute - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new "" - # # Yields b, c, d - # doc.root.each_element_with_attribute( 'id' ) {|e| p e} - # # Yields b, d - # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e} - # # Yields b - # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e} - # # Yields d - # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e} + # :call-seq: + # each_element_with_attribute(attr_name, value = nil, max = 0, xpath = nil) {|e| ... } + # + # Calls the given block with each child element that meets given criteria. + # + # When only string argument +attr_name+ is given, + # calls the block with each child element that has that attribute: + # + # d = REXML::Document.new '' + # a = d.root + # a.each_element_with_attribute('id') {|e| p e } + # + # Output: + # + # + # + # + # + # With argument +attr_name+ and string argument +value+ given, + # calls the block with each child element that has that attribute + # with that value: + # + # a.each_element_with_attribute('id', '1') {|e| p e } + # + # Output: + # + # + # + # + # With arguments +attr_name+, +value+, and integer argument +max+ given, + # calls the block with at most +max+ child elements: + # + # a.each_element_with_attribute('id', '1', 1) {|e| p e } + # + # Output: + # + # + # + # With all arguments given, including +xpath+, + # calls the block with only those child elements + # that meet the first three criteria, + # and also match the given +xpath+: + # + # a.each_element_with_attribute('id', '1', 2, '//d') {|e| p e } + # + # Output: + # + # + # def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element each_with_something( proc {|child| if value.nil? @@ -358,27 +845,53 @@ module REXML }, max, name, &block ) end - # Iterates through the children, yielding for each Element that - # has a particular text set. - # text:: - # the text to search for. If nil, or not supplied, will iterate - # over all +Element+ children that contain at least one +Text+ node. - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new 'bbd' - # # Yields b, c, d - # doc.each_element_with_text {|e|p e} - # # Yields b, c - # doc.each_element_with_text('b'){|e|p e} - # # Yields b - # doc.each_element_with_text('b', 1){|e|p e} - # # Yields d - # doc.each_element_with_text(nil, 0, 'd'){|e|p e} + # :call-seq: + # each_element_with_text(text = nil, max = 0, xpath = nil) {|e| ... } + # + # Calls the given block with each child element that meets given criteria. + # + # With no arguments, calls the block with each child element that has text: + # + # d = REXML::Document.new 'bbd' + # a = d.root + # a.each_element_with_text {|e| p e } + # + # Output: + # + # ... + # ... + # ... + # + # With the single string argument +text+, + # calls the block with each element that has exactly that text: + # + # a.each_element_with_text('b') {|e| p e } + # + # Output: + # + # ... + # ... + # + # With argument +text+ and integer argument +max+, + # calls the block with at most +max+ elements: + # + # a.each_element_with_text('b', 1) {|e| p e } + # + # Output: + # + # ... + # + # With all arguments given, including +xpath+, + # calls the block with only those child elements + # that meet the first two criteria, + # and also match the given +xpath+: + # + # a.each_element_with_text('b', 2, '//c') {|e| p e } + # + # Output: + # + # ... + # def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element each_with_something( proc {|child| if text.nil? @@ -389,35 +902,71 @@ module REXML }, max, name, &block ) end - # Synonym for Element.elements.each + # :call-seq: + # each_element {|e| ... } + # + # Calls the given block with each child element: + # + # d = REXML::Document.new 'bbd' + # a = d.root + # a.each_element {|e| p e } + # + # Output: + # + # ... + # ... + # ... + # + # def each_element( xpath=nil, &block ) # :yields: Element @elements.each( xpath, &block ) end - # Synonym for Element.to_a - # This is a little slower than calling elements.each directly. - # xpath:: any XPath by which to search for elements in the tree - # Returns:: an array of Elements that match the supplied path + # :call-seq: + # get_elements(xpath) + # + # Returns an array of the elements that match the given +xpath+: + # + # xml_string = <<-EOT + # + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # d.root.get_elements('//a') # => [ ... , ] + # def get_elements( xpath ) @elements.to_a( xpath ) end - # Returns the next sibling that is an element, or nil if there is - # no Element sibling after this one - # doc = Document.new 'text' - # doc.root.elements['b'].next_element #-> - # doc.root.elements['c'].next_element #-> nil + # :call-seq: + # next_element + # + # Returns the next sibling that is an element if it exists, + # +niL+ otherwise: + # + # d = REXML::Document.new 'text' + # d.root.elements['b'].next_element #-> + # d.root.elements['c'].next_element #-> nil + # def next_element element = next_sibling element = element.next_sibling until element.nil? or element.kind_of? Element return element end - # Returns the previous sibling that is an element, or nil if there is - # no Element sibling prior to this one - # doc = Document.new 'text' - # doc.root.elements['c'].previous_element #-> - # doc.root.elements['b'].previous_element #-> nil + # :call-seq: + # previous_element + # + # Returns the previous sibling that is an element if it exists, + # +niL+ otherwise: + # + # d = REXML::Document.new 'text' + # d.root.elements['c'].previous_element #-> + # d.root.elements['b'].previous_element #-> nil + # def previous_element element = previous_sibling element = element.previous_sibling until element.nil? or element.kind_of? Element @@ -429,36 +978,69 @@ module REXML # Text # ################################################# - # Evaluates to +true+ if this element has at least one Text child + # :call-seq: + # has_text? -> true or false + # + # Returns +true+ if the element has one or more text noded, + # +false+ otherwise: + # + # d = REXML::Document.new 'text' + # a = d.root + # a.has_text? # => true + # b = a[0] + # b.has_text? # => false + # def has_text? not text().nil? end - # A convenience method which returns the String value of the _first_ - # child text element, if one exists, and +nil+ otherwise. + # :call-seq: + # text(xpath = nil) -> text_string or nil + # + # Returns the text string from the first text node child + # in a specified element, if it exists, +nil+ otherwise. # - # Note that an element may have multiple Text elements, perhaps - # separated by other children. Be aware that this method only returns - # the first Text node. + # With no argument, returns the text from the first text node in +self+: # - # This method returns the +value+ of the first text child node, which - # ignores the +raw+ setting, so always returns normalized text. See - # the Text::value documentation. + # d = REXML::Document.new "

some text this is bold! more text

" + # d.root.text.class # => String + # d.root.text # => "some text " + # + # With argument +xpath+, returns text from the first text node + # in the element that matches +xpath+: + # + # d.root.text(1) # => "this is bold!" + # + # Note that an element may have multiple text nodes, + # possibly separated by other non-text children, as above. + # Even so, the returned value is the string text from the first such node. + # + # Note also that the text note is retrieved by method get_text, + # and so is always normalized text. # - # doc = Document.new "

some text this is bold! more text

" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.text #-> "some text " def text( path = nil ) rv = get_text(path) return rv.value unless rv.nil? nil end - # Returns the first child Text node, if any, or +nil+ otherwise. - # This method returns the actual +Text+ node, rather than the String content. - # doc = Document.new "

some text this is bold! more text

" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.get_text.value #-> "some text " + # :call-seq: + # get_text(xpath = nil) -> text_node or nil + # + # Returns the first text node child in a specified element, if it exists, + # +nil+ otherwise. + # + # With no argument, returns the first text node from +self+: + # + # d = REXML::Document.new "

some text this is bold! more text

" + # d.root.get_text.class # => REXML::Text + # d.root.get_text # => "some text " + # + # With argument +xpath+, returns the first text node from the element + # that matches +xpath+: + # + # d.root.get_text(1) # => "this is bold!" + # def get_text path = nil rv = nil if path @@ -470,26 +1052,31 @@ module REXML return rv end - # Sets the first Text child of this object. See text() for a - # discussion about Text children. - # - # If a Text child already exists, the child is replaced by this - # content. This means that Text content can be deleted by calling - # this method with a nil argument. In this case, the next Text - # child becomes the first Text child. In no case is the order of - # any siblings disturbed. - # text:: - # If a String, a new Text child is created and added to - # this Element as the first Text child. If Text, the text is set - # as the first Child element. If nil, then any existing first Text - # child is removed. - # Returns:: this Element. - # doc = Document.new '' - # doc.root.text = 'Sean' #-> 'Sean' - # doc.root.text = 'Elliott' #-> 'Elliott' - # doc.root.add_element 'c' #-> 'Elliott' - # doc.root.text = 'Russell' #-> 'Russell' - # doc.root.text = nil #-> '' + # :call-seq: + # text = string -> string + # text = nil -> nil + # + # Adds, replaces, or removes the first text node child in the element. + # + # With string argument +string+, + # creates a new \REXML::Text node containing that string, + # honoring the current settings for whitespace and row, + # then places the node as the first text child in the element; + # returns +string+. + # + # If the element has no text child, the text node is added: + # + # d = REXML::Document.new '' + # d.root.text = 'foo' #-> 'foo' + # + # If the element has a text child, it is replaced: + # + # d.root.text = 'bar' #-> 'bar' + # + # With argument +nil+, removes the first text child: + # + # d.root.text = nil #-> '' + # def text=( text ) if text.kind_of? String text = Text.new( text, whitespace(), nil, raw() ) @@ -509,17 +1096,45 @@ module REXML return self end - # A helper method to add a Text child. Actual Text instances can - # be added with regular Parent methods, such as add() and <<() - # text:: - # if a String, a new Text instance is created and added - # to the parent. If Text, the object is added directly. - # Returns:: this Element - # e = Element.new('a') #-> - # e.add_text 'foo' #-> foo - # e.add_text Text.new(' bar') #-> foo bar - # Note that at the end of this example, the branch has 3 nodes; the 'e' - # element and 2 Text node children. + # :call-seq: + # add_text(string) -> nil + # add_text(text_node) -> self + # + # Adds text to the element. + # + # When string argument +string+ is given, returns +nil+. + # + # If the element has no child text node, + # creates a \REXML::Text object using the string, + # honoring the current settings for whitespace and raw, + # then adds that node to the element: + # + # d = REXML::Document.new('') + # a = d.root + # a.add_text('foo') + # a.to_a # => [, "foo"] + # + # If the element has child text nodes, + # appends the string to the _last_ text node: + # + # d = REXML::Document.new('foobar') + # a = d.root + # a.add_text('baz') + # a.to_a # => ["foo", , "barbaz"] + # a.add_text('baz') + # a.to_a # => ["foo", , "barbazbaz"] + # + # When text node argument +text_node+ is given, + # appends the node as the last text node in the element; + # returns +self+: + # + # d = REXML::Document.new('foobar') + # a = d.root + # a.add_text(REXML::Text.new('baz')) + # a.to_a # => ["foo", , "bar", "baz"] + # a.add_text(REXML::Text.new('baz')) + # a.to_a # => ["foo", , "bar", "baz", "baz"] + # def add_text( text ) if text.kind_of? String if @children[-1].kind_of? Text @@ -532,10 +1147,39 @@ module REXML return self end + # :call-seq: + # node_type -> :element + # + # Returns symbol :element: + # + # d = REXML::Document.new('') + # a = d.root # => + # a.node_type # => :element + # def node_type :element end + # :call-seq: + # xpath -> string_xpath + # + # Returns the string xpath to the element + # relative to the most distant parent: + # + # d = REXML::Document.new('') + # a = d.root # => ... + # b = a[0] # => ... + # c = b[0] # => + # d.xpath # => "" + # a.xpath # => "/a" + # b.xpath # => "/a/b" + # c.xpath # => "/a/b/c" + # + # If there is no parent, returns the expanded name of the element: + # + # e = REXML::Element.new('foo') + # e.xpath # => "foo" + # def xpath path_elements = [] cur = self @@ -551,19 +1195,45 @@ module REXML # Attributes # ################################################# - # Fetches an attribute value or a child. + # :call-seq: + # [index] -> object + # [attr_name] -> attr_value + # [attr_sym] -> attr_value + # + # With integer argument +index+ given, + # returns the child at offset +index+, or +nil+ if none: + # + # d = REXML::Document.new '>textmore
' + # root = d.root + # (0..root.size).each do |index| + # node = root[index] + # p "#{index}: #{node} (#{node.class})" + # end + # + # Output: + # + # "0: (REXML::Element)" + # "1: text (REXML::Text)" + # "2: (REXML::Element)" + # "3: more (REXML::Text)" + # "4: (REXML::Element)" + # "5: (NilClass)" + # + # With string argument +attr_name+ given, + # returns the string value for the given attribute name if it exists, + # otherwise +nil+: + # + # d = REXML::Document.new('') + # root = d.root + # root['attr'] # => "value" + # root['nosuch'] # => nil # - # If String or Symbol is specified, it's treated as attribute - # name. Attribute value as String or +nil+ is returned. This case - # is shortcut of +attributes[name]+. + # With symbol argument +attr_sym+ given, + # returns [attr_sym.to_s]: # - # If Integer is specified, it's treated as the index of - # child. It returns Nth child. + # root[:attr] # => "value" + # root[:nosuch] # => nil # - # doc = REXML::Document.new("") - # doc.root["attr"] # => "1" - # doc.root.attributes["attr"] # => "1" - # doc.root[1] # => def [](name_or_index) case name_or_index when String @@ -575,17 +1245,42 @@ module REXML end end + + # :call-seq: + # attribute(name, namespace = nil) + # + # Returns the string value for the given attribute name. + # + # With only argument +name+ given, + # returns the value of the named attribute if it exists, otherwise +nil+: + # + # xml_string = <<-EOT + # + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # root = d.root + # a = root[1] # => + # a.attribute('attr') # => attr='value' + # a.attribute('nope') # => nil + # + # With arguments +name+ and +namespace+ given, + # returns the value of the named attribute if it exists, otherwise +nil+: + # + # xml_string = "" + # document = REXML::Document.new(xml_string) + # document.root.attribute("x") # => x='x' + # document.root.attribute("x", "a") # => a:x='a:x' + # def attribute( name, namespace=nil ) - prefix = nil - if namespaces.respond_to? :key - prefix = namespaces.key(namespace) if namespace - else - prefix = namespaces.index(namespace) if namespace - end + prefix = namespaces.key(namespace) if namespace prefix = nil if prefix == 'xmlns' ret_val = - attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) + attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name ) return ret_val unless ret_val.nil? return nil if prefix.nil? @@ -598,29 +1293,46 @@ module REXML end - # Evaluates to +true+ if this element has any attributes set, false - # otherwise. + # :call-seq: + # has_attributes? -> true or false + # + # Returns +true+ if the element has attributes, +false+ otherwise: + # + # d = REXML::Document.new('
') + # a, b = *d.root + # a.has_attributes? # => true + # b.has_attributes? # => false + # def has_attributes? return !@attributes.empty? end + # :call-seq: + # add_attribute(name, value) -> value + # add_attribute(attribute) -> attribute + # # Adds an attribute to this element, overwriting any existing attribute # by the same name. - # key:: - # can be either an Attribute or a String. If an Attribute, - # the attribute is added to the list of Element attributes. If String, - # the argument is used as the name of the new attribute, and the value - # parameter must be supplied. - # value:: - # Required if +key+ is a String, and ignored if the first argument is - # an Attribute. This is a String, and is used as the value - # of the new Attribute. This should be the unnormalized value of the - # attribute (without entities). - # Returns:: the Attribute added - # e = Element.new 'e' - # e.add_attribute( 'a', 'b' ) #-> - # e.add_attribute( 'x:a', 'c' ) #-> - # e.add_attribute Attribute.new('b', 'd') #-> + # + # With string argument +name+ and object +value+ are given, + # adds the attribute created with that name and value: + # + # e = REXML::Element.new + # e.add_attribute('attr', 'value') # => "value" + # e['attr'] # => "value" + # e.add_attribute('attr', 'VALUE') # => "VALUE" + # e['attr'] # => "VALUE" + # + # With only attribute object +attribute+ given, + # adds the given attribute: + # + # a = REXML::Attribute.new('attr', 'value') + # e.add_attribute(a) # => attr='value' + # e['attr'] # => "value" + # a = REXML::Attribute.new('attr', 'VALUE') + # e.add_attribute(a) # => attr='VALUE' + # e['attr'] # => "VALUE" + # def add_attribute( key, value=nil ) if key.kind_of? Attribute @attributes << key @@ -629,10 +1341,29 @@ module REXML end end - # Add multiple attributes to this element. - # hash:: is either a hash, or array of arrays - # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} ) - # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] ) + # :call-seq: + # add_attributes(hash) -> hash + # add_attributes(array) + # + # Adds zero or more attributes to the element; + # returns the argument. + # + # If hash argument +hash+ is given, + # each key must be a string; + # adds each attribute created with the key/value pair: + # + # e = REXML::Element.new + # h = {'foo' => 'bar', 'baz' => 'bat'} + # e.add_attributes(h) + # + # If argument +array+ is given, + # each array member must be a 2-element array [name, value]; + # each name must be a string: + # + # e = REXML::Element.new + # a = [['foo' => 'bar'], ['baz' => 'bat']] + # e.add_attributes(a) + # def add_attributes hash if hash.kind_of? Hash hash.each_pair {|key, value| @attributes[key] = value } @@ -641,19 +1372,17 @@ module REXML end end - # Removes an attribute - # key:: - # either an Attribute or a String. In either case, the - # attribute is found by matching the attribute name to the argument, - # and then removed. If no attribute is found, no action is taken. - # Returns:: - # the attribute removed, or nil if this Element did not contain - # a matching attribute - # e = Element.new('E') - # e.add_attribute( 'name', 'Sean' ) #-> - # r = e.add_attribute( 'sur:name', 'Russell' ) #-> - # e.delete_attribute( 'name' ) #-> - # e.delete_attribute( r ) #-> + # :call-seq: + # delete_attribute(name) -> removed_attribute or nil + # + # Removes a named attribute if it exists; + # returns the removed attribute if found, otherwise +nil+: + # + # e = REXML::Element.new('foo') + # e.add_attribute('bar', 'baz') + # e.delete_attribute('bar') # => + # e.delete_attribute('bar') # => nil + # def delete_attribute(key) attr = @attributes.get_attribute(key) attr.remove unless attr.nil? @@ -663,26 +1392,80 @@ module REXML # Other Utilities # ################################################# - # Get an array of all CData children. - # IMMUTABLE + # :call-seq: + # cdatas -> array_of_cdata_children + # + # Returns a frozen array of the REXML::CData children of the element: + # + # xml_string = <<-EOT + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # cds = d.root.cdatas # => ["foo", "bar"] + # cds.frozen? # => true + # cds.map {|cd| cd.class } # => [REXML::CData, REXML::CData] + # def cdatas find_all { |child| child.kind_of? CData }.freeze end - # Get an array of all Comment children. - # IMMUTABLE + # :call-seq: + # comments -> array_of_comment_children + # + # Returns a frozen array of the REXML::Comment children of the element: + # + # xml_string = <<-EOT + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # cs = d.root.comments + # cs.frozen? # => true + # cs.map {|c| c.class } # => [REXML::Comment, REXML::Comment] + # cs.map {|c| c.to_s } # => ["foo", "bar"] + # def comments find_all { |child| child.kind_of? Comment }.freeze end - # Get an array of all Instruction children. - # IMMUTABLE + # :call-seq: + # instructions -> array_of_instruction_children + # + # Returns a frozen array of the REXML::Instruction children of the element: + # + # xml_string = <<-EOT + # + # + # + # + # EOT + # d = REXML::Document.new(xml_string) + # is = d.root.instructions + # is.frozen? # => true + # is.map {|i| i.class } # => [REXML::Instruction, REXML::Instruction] + # is.map {|i| i.to_s } # => ["", ""] + # def instructions find_all { |child| child.kind_of? Instruction }.freeze end - # Get an array of all Text children. - # IMMUTABLE + # :call-seq: + # texts -> array_of_text_children + # + # Returns a frozen array of the REXML::Text children of the element: + # + # xml_string = 'textmore' + # d = REXML::Document.new(xml_string) + # ts = d.root.texts + # ts.frozen? # => true + # ts.map {|t| t.class } # => [REXML::Text, REXML::Text] + # ts.map {|t| t.to_s } # => ["text", "more"] + # def texts find_all { |child| child.kind_of? Text }.freeze end @@ -713,7 +1496,7 @@ module REXML Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1) formatter = if indent > -1 if transitive - require "rexml/formatters/transitive" + require_relative "formatters/transitive" REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) @@ -758,35 +1541,129 @@ module REXML # XPath search support. You are expected to only encounter this class as # the element.elements object. Therefore, you are # _not_ expected to instantiate this yourself. + # + # xml_string = <<-EOT + # + # + # + # Everyday Italian + # Giada De Laurentiis + # 2005 + # 30.00 + # + # + # Harry Potter + # J K. Rowling + # 2005 + # 29.99 + # + # + # XQuery Kick Start + # James McGovern + # Per Bothner + # Kurt Cagle + # James Linn + # Vaidyanathan Nagarajan + # 2003 + # 49.99 + # + # + # Learning XML + # Erik T. Ray + # 2003 + # 39.95 + # + # + # EOT + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements # => # ... > + # class Elements include Enumerable - # Constructor - # parent:: the parent Element + # :call-seq: + # new(parent) -> new_elements_object + # + # Returns a new \Elements object with the given +parent+. + # Does _not_ assign parent.elements = self: + # + # d = REXML::Document.new(xml_string) + # eles = REXML::Elements.new(d.root) + # eles # => # ... > + # eles == d.root.elements # => false + # def initialize parent @element = parent end - # Fetches a child element. Filters only Element children, regardless of - # the XPath match. - # index:: - # the search parameter. This is either an Integer, which - # will be used to find the index'th child Element, or an XPath, - # which will be used to search for the Element. Because - # of the nature of XPath searches, any element in the connected XML - # document can be fetched through any other element. The - # Integer index is 1-based, not 0-based. This means that the first - # child element is at index 1, not 0, and the +n+th element is at index - # +n+, not n-1. This is because XPath indexes element children - # starting from 1, not 0, and the indexes should be the same. - # name:: - # optional, and only used in the first argument is an - # Integer. In that case, the index'th child Element that has the - # supplied name will be returned. Note again that the indexes start at 1. - # Returns:: the first matching Element, or nil if no child matched - # doc = Document.new '' - # doc.root.elements[1] #-> - # doc.root.elements['c'] #-> - # doc.root.elements[2,'c'] #-> + # :call-seq: + # parent + # + # Returns the parent element cited in creating the \Elements object. + # This element is also the default starting point for searching + # in the \Elements object. + # + # d = REXML::Document.new(xml_string) + # elements = REXML::Elements.new(d.root) + # elements.parent == d.root # => true + # + def parent + @element + end + + # :call-seq: + # elements[index] -> element or nil + # elements[xpath] -> element or nil + # elements[n, name] -> element or nil + # + # Returns the first \Element object selected by the arguments, + # if any found, or +nil+ if none found. + # + # Notes: + # - The +index+ is 1-based, not 0-based, so that: + # - The first element has index 1 + # - The _nth_ element has index +n+. + # - The selection ignores non-\Element nodes. + # + # When the single argument +index+ is given, + # returns the element given by the index, if any; otherwise, +nil+: + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements + # eles # => # ... > + # eles[1] # => ... + # eles.size # => 4 + # eles[4] # => ... + # eles[5] # => nil + # + # The node at this index is not an \Element, and so is not returned: + # + # eles = d.root.first.first # => ... </> + # eles.to_a # => ["Everyday Italian"] + # eles[1] # => nil + # + # When the single argument +xpath+ is given, + # returns the first element found via that +xpath+, if any; otherwise, +nil+: + # + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles['/bookstore'] # => <bookstore> ... </> + # eles['//book'] # => <book category='cooking'> ... </> + # eles['//book [@category="children"]'] # => <book category='children'> ... </> + # eles['/nosuch'] # => nil + # eles['//nosuch'] # => nil + # eles['//book [@category="nosuch"]'] # => nil + # eles['.'] # => <bookstore> ... </> + # eles['..'].class # => REXML::Document + # + # With arguments +n+ and +name+ given, + # returns the _nth_ found element that has the given +name+, + # or +nil+ if there is no such _nth_ element: + # + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles[1, 'book'] # => <book category='cooking'> ... </> + # eles[4, 'book'] # => <book category='web' cover='paperback'> ... </> + # eles[5, 'book'] # => nil + # def []( index, name=nil) if index.kind_of? Integer raise "index (#{index}) must be >= 1" if index < 1 @@ -806,19 +1683,42 @@ module REXML end end - # Sets an element, replacing any previous matching element. If no - # existing element is found ,the element is added. - # index:: Used to find a matching element to replace. See [](). - # element:: - # The element to replace the existing element with - # the previous element - # Returns:: nil if no previous element was found. + # :call-seq: + # elements[] = index, replacement_element -> replacement_element or nil + # + # Replaces or adds an element. + # + # When <tt>eles[index]</tt> exists, replaces it with +replacement_element+ + # and returns +replacement_element+: + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles[1] # => <book category='cooking'> ... </> + # eles[1] = REXML::Element.new('foo') + # eles[1] # => <foo/> + # + # Does nothing (or raises an exception) + # if +replacement_element+ is not an \Element: + # eles[2] # => <book category='web' cover='paperback'> ... </> + # eles[2] = REXML::Text.new('bar') + # eles[2] # => <book category='web' cover='paperback'> ... </> + # + # When <tt>eles[index]</tt> does not exist, + # adds +replacement_element+ to the element and returns + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles.size # => 4 + # eles[50] = REXML::Element.new('foo') # => <foo/> + # eles.size # => 5 + # eles[5] # => <foo/> + # + # Does nothing (or raises an exception) + # if +replacement_element+ is not an \Element: + # + # eles[50] = REXML::Text.new('bar') # => "bar" + # eles.size # => 5 # - # doc = Document.new '<a/>' - # doc.root.elements[10] = Element.new('b') #-> <a><b/></a> - # doc.root.elements[1] #-> <b/> - # doc.root.elements[1] = Element.new('c') #-> <a><c/></a> - # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a> def []=( index, element ) previous = self[index] if previous.nil? @@ -829,14 +1729,34 @@ module REXML return previous end - # Returns +true+ if there are no +Element+ children, +false+ otherwise + # :call-seq: + # empty? -> true or false + # + # Returns +true+ if there are no children, +false+ otherwise. + # + # d = REXML::Document.new('') + # d.elements.empty? # => true + # d = REXML::Document.new(xml_string) + # d.elements.empty? # => false + # def empty? @element.find{ |child| child.kind_of? Element}.nil? end - # Returns the index of the supplied child (starting at 1), or -1 if - # the element is not a child - # element:: an +Element+ child + # :call-seq: + # index(element) + # + # Returns the 1-based index of the given +element+, if found; + # otherwise, returns -1: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # ele_1, ele_2, ele_3, ele_4 = *elements + # elements.index(ele_4) # => 4 + # elements.delete(ele_3) + # elements.index(ele_4) # => 3 + # elements.index(ele_3) # => -1 + # def index element rv = 0 found = @element.find do |child| @@ -848,17 +1768,47 @@ module REXML return -1 end - # Deletes a child Element - # element:: - # Either an Element, which is removed directly; an - # xpath, where the first matching child is removed; or an Integer, - # where the n'th Element is removed. - # Returns:: the removed child - # doc = Document.new '<a><b/><c/><c id="1"/></a>' - # b = doc.root.elements[1] - # doc.root.elements.delete b #-> <a><c/><c id="1"/></a> - # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a> - # doc.root.elements.delete 1 #-> <a/> + # :call-seq: + # delete(index) -> removed_element or nil + # delete(element) -> removed_element or nil + # delete(xpath) -> removed_element or nil + # + # Removes an element; returns the removed element, or +nil+ if none removed. + # + # With integer argument +index+ given, + # removes the child element at that offset: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # elements[2] # => <book category='children'> ... </> + # elements.delete(2) # => <book category='children'> ... </> + # elements.size # => 3 + # elements[2] # => <book category='web'> ... </> + # elements.delete(50) # => nil + # + # With element argument +element+ given, + # removes that child element: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # ele_1, ele_2, ele_3, ele_4 = *elements + # elements.size # => 4 + # elements[2] # => <book category='children'> ... </> + # elements.delete(ele_2) # => <book category='children'> ... </> + # elements.size # => 3 + # elements[2] # => <book category='web'> ... </> + # elements.delete(ele_2) # => nil + # + # With string argument +xpath+ given, + # removes the first element found via that xpath: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.delete('//book') # => <book category='cooking'> ... </> + # elements.delete('//book [@category="children"]') # => <book category='children'> ... </> + # elements.delete('//nosuch') # => nil + # def delete element if element.kind_of? Element @element.delete element @@ -868,12 +1818,23 @@ module REXML end end - # Removes multiple elements. Filters for Element children, regardless of - # XPath matching. - # xpath:: all elements matching this String path are removed. - # Returns:: an Array of Elements that have been removed - # doc = Document.new '<a><c/><c/><c/><c/></a>' - # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>] + # :call-seq: + # delete_all(xpath) + # + # Removes all elements found via the given +xpath+; + # returns the array of removed elements, if any, else +nil+. + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # deleted_elements = elements.delete_all('//book [@category="web"]') + # deleted_elements.size # => 2 + # elements.size # => 2 + # deleted_elements = elements.delete_all('//book') + # deleted_elements.size # => 2 + # elements.size # => 0 + # elements.delete_all('//book') # => [] + # def delete_all( xpath ) rv = [] XPath::each( @element, xpath) {|element| @@ -886,15 +1847,68 @@ module REXML return rv end - # Adds an element - # element:: - # if supplied, is either an Element, String, or - # Source (see Element.initialize). If not supplied or nil, a - # new, default Element will be constructed - # Returns:: the added Element - # a = Element.new('a') - # a.elements.add(Element.new('b')) #-> <a><b/></a> - # a.elements.add('c') #-> <a><b/><c/></a> + # :call-seq: + # add -> new_element + # add(name) -> new_element + # add(element) -> element + # + # Adds an element; returns the element added. + # + # With no argument, creates and adds a new element. + # The new element has: + # + # - No name. + # - \Parent from the \Elements object. + # - Context from the that parent. + # + # Example: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # parent = elements.parent # => <bookstore> ... </> + # parent.context = {raw: :all} + # elements.size # => 4 + # new_element = elements.add # => </> + # elements.size # => 5 + # new_element.name # => nil + # new_element.parent # => <bookstore> ... </> + # new_element.context # => {:raw=>:all} + # + # With string argument +name+, creates and adds a new element. + # The new element has: + # + # - Name +name+. + # - \Parent from the \Elements object. + # - Context from the that parent. + # + # Example: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # parent = elements.parent # => <bookstore> ... </> + # parent.context = {raw: :all} + # elements.size # => 4 + # new_element = elements.add('foo') # => <foo/> + # elements.size # => 5 + # new_element.name # => "foo" + # new_element.parent # => <bookstore> ... </> + # new_element.context # => {:raw=>:all} + # + # With argument +element+, + # creates and adds a clone of the given +element+. + # The new element has name, parent, and context from the given +element+. + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar', e0, {raw: :all}) + # element = elements.add(e1) # => <bar/> + # elements.size # => 5 + # element.name # => "bar" + # element.parent # => <bookstore> ... </> + # element.context # => {:raw=>:all} + # def add element=nil if element.nil? Element.new("", self, @element.context) @@ -909,24 +1923,55 @@ module REXML alias :<< :add - # Iterates through all of the child Elements, optionally filtering - # them by a given XPath - # xpath:: - # optional. If supplied, this is a String XPath, and is used to - # filter the children, so that only matching children are yielded. Note - # that XPaths are automatically filtered for Elements, so that - # non-Element children will not be yielded - # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>' - # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements - # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements - # doc.root.elements.each('child::node()') {|e|p e} - # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/> - # XPath.each(doc.root, 'child::node()', &block) - # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/> + # :call-seq: + # each(xpath = nil) {|element| ... } -> self + # + # Iterates over the elements. + # + # With no argument, calls the block with each element: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.each {|element| p element } + # + # Output: + # + # <book category='cooking'> ... </> + # <book category='children'> ... </> + # <book category='web'> ... </> + # <book category='web' cover='paperback'> ... </> + # + # With argument +xpath+, calls the block with each element + # that matches the given +xpath+: + # + # elements.each('//book [@category="web"]') {|element| p element } + # + # Output: + # + # <book category='web'> ... </> + # <book category='web' cover='paperback'> ... </> + # def each( xpath=nil ) XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element } end + # :call-seq: + # collect(xpath = nil) {|element| ... } -> array + # + # Iterates over the elements; returns the array of block return values. + # + # With no argument, iterates over all elements: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.collect {|element| element.size } # => [9, 9, 17, 9] + # + # With argument +xpath+, iterates over elements that match + # the given +xpath+: + # + # xpath = '//book [@category="web"]' + # elements.collect(xpath) {|element| element.size } # => [17, 9] + # def collect( xpath=nil ) collection = [] XPath::each( @element, xpath ) {|e| @@ -935,6 +1980,83 @@ module REXML collection end + # :call-seq: + # inject(xpath = nil, initial = nil) -> object + # + # Calls the block with elements; returns the last block return value. + # + # With no argument, iterates over the elements, calling the block + # <tt>elements.size - 1</tt> times. + # + # - The first call passes the first and second elements. + # - The second call passes the first block return value and the third element. + # - The third call passes the second block return value and the fourth element. + # - And so on. + # + # In this example, the block returns the passed element, + # which is then the object argument to the next call: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.inject do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [1, 2] + # [2, 3] + # [3, 4] + # + # With the single argument +xpath+, calls the block only with + # elements matching that xpath: + # + # elements.inject('//book [@category="web"]') do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [3, 4] + # + # With argument +xpath+ given as +nil+ + # and argument +initial+ also given, + # calls the block once for each element. + # + # - The first call passes the +initial+ and the first element. + # - The second call passes the first block return value and the second element. + # - The third call passes the second block return value and the third element. + # - And so on. + # + # In this example, the first object index is <tt>-1</tt> + # + # elements.inject(nil, 'Initial') do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [-1, 1] + # [1, 2] + # [2, 3] + # [3, 4] + # + # In this form the passed object can be used as an accumulator: + # + # elements.inject(nil, 0) do |total, element| + # total += element.size + # end # => 44 + # + # With both arguments +xpath+ and +initial+ are given, + # calls the block only with elements matching that xpath: + # + # elements.inject('//book [@category="web"]', 0) do |total, element| + # total += element.size + # end # => 26 + # def inject( xpath=nil, initial=nil ) first = true XPath::each( @element, xpath ) {|e| @@ -950,23 +2072,39 @@ module REXML initial end - # Returns the number of +Element+ children of the parent object. - # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' - # doc.root.size #-> 6, 3 element and 3 text nodes - # doc.root.elements.size #-> 3 + # :call-seq: + # size -> integer + # + # Returns the count of \Element children: + # + # d = REXML::Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' + # d.root.elements.size # => 3 # Three elements. + # d.root.size # => 6 # Three elements plus three text nodes.. + # def size count = 0 @element.each {|child| count+=1 if child.kind_of? Element } count end - # Returns an Array of Element children. An XPath may be supplied to - # filter the children. Only Element children are returned, even if the - # supplied XPath matches non-Element children. - # doc = Document.new '<a>sean<b/>elliott<c/></a>' - # doc.root.elements.to_a #-> [ <b/>, <c/> ] - # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] - # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ] + # :call-seq: + # to_a(xpath = nil) -> array_of_elements + # + # Returns an array of element children (not including non-element children). + # + # With no argument, returns an array of all element children: + # + # d = REXML::Document.new '<a>sean<b/>elliott<c/></a>' + # elements = d.root.elements + # elements.to_a # => [<b/>, <c/>] # Omits non-element children. + # children = d.root.children + # children # => ["sean", <b/>, "elliott", <c/>] # Includes non-element children. + # + # With argument +xpath+, returns an array of element children + # that match the xpath: + # + # elements.to_a('//c') # => [<c/>] + # def to_a( xpath=nil ) rv = XPath.match( @element, xpath ) return rv.find_all{|e| e.kind_of? Element} if xpath @@ -988,36 +2126,89 @@ module REXML # A class that defines the set of Attributes of an Element and provides # operations for accessing elements in that set. class Attributes < Hash - # Constructor - # element:: the Element of which this is an Attribute + + # :call-seq: + # new(element) + # + # Creates and returns a new \REXML::Attributes object. + # The element given by argument +element+ is stored, + # but its own attributes are not modified: + # + # ele = REXML::Element.new('foo') + # attrs = REXML::Attributes.new(ele) + # attrs.object_id == ele.attributes.object_id # => false + # + # Other instance methods in class \REXML::Attributes may refer to: + # + # - +element.document+. + # - +element.prefix+. + # - +element.expanded_name+. + # def initialize element @element = element end - # Fetches an attribute value. If you want to get the Attribute itself, - # use get_attribute() - # name:: an XPath attribute name. Namespaces are relevant here. - # Returns:: - # the String value of the matching attribute, or +nil+ if no - # matching attribute was found. This is the unnormalized value - # (with entities expanded). + # :call-seq: + # [name] -> attribute_value or nil + # + # Returns the value for the attribute given by +name+, + # if it exists; otherwise +nil+. + # The value returned is the unnormalized attribute value, + # with entities expanded: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes['att'] # => "<" + # ele.attributes['bar:att'] # => "2" + # ele.attributes['nosuch'] # => nil + # + # Related: get_attribute (returns an \Attribute object). # - # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>" - # doc.root.attributes['att'] #-> '<' - # doc.root.attributes['bar:att'] #-> '2' def [](name) attr = get_attribute(name) return attr.value unless attr.nil? return nil end + # :call-seq: + # to_a -> array_of_attribute_objects + # + # Returns an array of \REXML::Attribute objects representing + # the attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes.to_a # => [foo:att='1', bar:att='2', att='<'] + # attrs.first.class # => REXML::Attribute + # def to_a enum_for(:each_attribute).to_a end - # Returns the number of attributes the owning Element contains. - # doc = Document "<a x='1' y='2' foo:x='3'/>" - # doc.root.attributes.length #-> 3 + # :call-seq: + # length + # + # Returns the count of attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.length # => 3 + # def length c = 0 each_attribute { c+=1 } @@ -1025,14 +2216,30 @@ module REXML end alias :size :length - # Iterates over the attributes of an Element. Yields actual Attribute - # nodes, not String values. + # :call-seq: + # each_attribute {|attr| ... } + # + # Calls the given block with each \REXML::Attribute object: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.each_attribute do |attr| + # p [attr.class, attr] + # end + # + # Output: + # + # [REXML::Attribute, foo:att='1'] + # [REXML::Attribute, bar:att='2'] + # [REXML::Attribute, att='<'] # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each_attribute {|attr| - # p attr.expanded_name+" => "+attr.value - # } def each_attribute # :yields: attribute + return to_enum(__method__) unless block_given? each_value do |val| if val.kind_of? Attribute yield val @@ -1042,26 +2249,54 @@ module REXML end end - # Iterates over each attribute of an Element, yielding the expanded name - # and value as a pair of Strings. + # :call-seq: + # each {|expanded_name, value| ... } + # + # Calls the given block with each expanded-name/value pair: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.each do |expanded_name, value| + # p [expanded_name, value] + # end + # + # Output: + # + # ["foo:att", "1"] + # ["bar:att", "2"] + # ["att", "<"] # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each {|name, value| p name+" => "+value } def each + return to_enum(__method__) unless block_given? each_attribute do |attr| yield [attr.expanded_name, attr.value] end end - # Fetches an attribute - # name:: - # the name by which to search for the attribute. Can be a - # <tt>prefix:name</tt> namespace name. - # Returns:: The first matching attribute, or nil if there was none. This - # value is an Attribute node, not the String value of the attribute. - # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>' - # doc.root.attributes.get_attribute("foo").value #-> "2" - # doc.root.attributes.get_attribute("x:foo").value #-> "1" + # :call-seq: + # get_attribute(name) -> attribute_object or nil + # + # Returns the \REXML::Attribute object for the given +name+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.get_attribute('foo:att') # => foo:att='1' + # attrs.get_attribute('foo:att').class # => REXML::Attribute + # attrs.get_attribute('bar:att') # => bar:att='2' + # attrs.get_attribute('att') # => att='<' + # attrs.get_attribute('nosuch') # => nil + # def get_attribute( name ) attr = fetch( name, nil ) if attr.nil? @@ -1095,18 +2330,29 @@ module REXML return attr end - # Sets an attribute, overwriting any existing attribute value by the - # same name. Namespace is significant. - # name:: the name of the attribute - # value:: - # (optional) If supplied, the value of the attribute. If - # nil, any existing matching attribute is deleted. - # Returns:: - # Owning element - # doc = Document.new "<a x:foo='1' foo='3'/>" - # doc.root.attributes['y:foo'] = '2' - # doc.root.attributes['foo'] = '4' - # doc.root.attributes['x:foo'] = nil + # :call-seq: + # [name] = value -> value + # + # When +value+ is non-+nil+, + # assigns that to the attribute for the given +name+, + # overwriting the previous value if it exists: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs['foo:att'] = '2' # => "2" + # attrs['baz:att'] = '3' # => "3" + # + # When +value+ is +nil+, deletes the attribute if it exists: + # + # attrs['baz:att'] = nil + # attrs.include?('baz:att') # => false + # def []=( name, value ) if value.nil? # Delete the named attribute attr = get_attribute(name) @@ -1130,28 +2376,35 @@ module REXML old_attr[value.prefix] = value elsif old_attr.prefix != value.prefix # Check for conflicting namespaces - raise ParseException.new( - "Namespace conflict in adding attribute \"#{value.name}\": "+ - "Prefix \"#{old_attr.prefix}\" = "+ - "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+ - "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if - value.prefix != "xmlns" and old_attr.prefix != "xmlns" and - @element.namespace( old_attr.prefix ) == - @element.namespace( value.prefix ) - store value.name, { old_attr.prefix => old_attr, - value.prefix => value } + if value.prefix != "xmlns" and old_attr.prefix != "xmlns" + old_namespace = old_attr.namespace + new_namespace = value.namespace + if old_namespace == new_namespace + raise ParseException.new( + "Namespace conflict in adding attribute \"#{value.name}\": "+ + "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+ + "prefix \"#{value.prefix}\" = \"#{new_namespace}\"") + end + end + store value.name, {old_attr.prefix => old_attr, + value.prefix => value} else store value.name, value end return @element end - # Returns an array of Strings containing all of the prefixes declared - # by this set of # attributes. The array does not include the default + # :call-seq: + # prefixes -> array_of_prefix_strings + # + # Returns an array of prefix strings in the attributes. + # The array does not include the default # namespace declaration, if one exists. - # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+ - # "z='glorp' p:k='gru'/>") - # prefixes = doc.root.attributes.prefixes #-> ['x', 'y'] + # + # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>' + # d = REXML::Document.new(xml_string) + # d.root.attributes.prefixes # => ["x", "y"] + # def prefixes ns = [] each_attribute do |attribute| @@ -1168,6 +2421,15 @@ module REXML ns end + # :call-seq: + # namespaces + # + # Returns a hash of name/value pairs for the namespaces: + # + # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>' + # d = REXML::Document.new(xml_string) + # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"} + # def namespaces namespaces = {} each_attribute do |attribute| @@ -1184,16 +2446,34 @@ module REXML namespaces end - # Removes an attribute - # attribute:: - # either a String, which is the name of the attribute to remove -- - # namespaces are significant here -- or the attribute to remove. - # Returns:: the owning element - # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>" - # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>" - # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>" - # attr = doc.root.attributes.get_attribute('y:foo') - # doc.root.attributes.delete attr #-> <a z:foo='4'/>" + # :call-seq: + # delete(name) -> element + # delete(attribute) -> element + # + # Removes a specified attribute if it exists; + # returns the attributes' element. + # + # When string argument +name+ is given, + # removes the attribute of that name if it exists: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/> + # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/> + # + # When attribute argument +attribute+ is given, + # removes that attribute if it exists: + # + # attr = REXML::Attribute.new('bar:att', '2') + # attrs.delete(attr) # => <ele att='<'/> # => <ele att='<'/> + # attrs.delete(attr) # => <ele att='<'/> # => <ele/> + # def delete( attribute ) name = nil prefix = nil @@ -1221,19 +2501,48 @@ module REXML @element end - # Adds an attribute, overriding any existing attribute by the - # same name. Namespaces are significant. - # attribute:: An Attribute + # :call-seq: + # add(attribute) -> attribute + # + # Adds attribute +attribute+, replacing the previous + # attribute of the same name if it exists; + # returns +attribute+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs # => {"att"=>{"foo"=>foo:att='1', "bar"=>bar:att='2', ""=>att='<'}} + # attrs.add(REXML::Attribute.new('foo:att', '2')) # => foo:att='2' + # attrs.add(REXML::Attribute.new('baz', '3')) # => baz='3' + # attrs.include?('baz') # => true + # def add( attribute ) self[attribute.name] = attribute end alias :<< :add - # Deletes all attributes matching a name. Namespaces are significant. - # name:: - # A String; all attributes that match this path will be removed - # Returns:: an Array of the Attributes that were removed + # :call-seq: + # delete_all(name) -> array_of_removed_attributes + # + # Removes all attributes matching the given +name+; + # returns an array of the removed attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.delete_all('att') # => [att='<'] + # def delete_all( name ) rv = [] each_attribute { |attribute| @@ -1243,11 +2552,23 @@ module REXML return rv end - # The +get_attribute_ns+ method retrieves a method by its namespace - # and name. Thus it is possible to reliably identify an attribute - # even if an XML processor has changed the prefix. + # :call-seq: + # get_attribute_ns(namespace, name) + # + # Returns the \REXML::Attribute object among the attributes + # that matches the given +namespace+ and +name+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.get_attribute_ns('http://foo', 'att') # => foo:att='1' + # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil # - # Method contributed by Henrik Martensson def get_attribute_ns(namespace, name) result = nil each_attribute() { |attribute| diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb index 97c7b6b..573db69 100644 --- a/lib/rexml/entity.rb +++ b/lib/rexml/entity.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require 'rexml/child' -require 'rexml/source' -require 'rexml/xmltokens' +require_relative 'child' +require_relative 'source' +require_relative 'xmltokens' module REXML class Entity < Child @@ -90,7 +90,7 @@ module REXML # object itself is valid.) # # out:: - # An object implementing <TT><<<TT> to which the entity will be + # An object implementing <TT><<</TT> to which the entity will be # output # indent:: # *DEPRECATED* and ignored @@ -132,24 +132,34 @@ module REXML # then: # doctype.entity('yada').value #-> "nanoo bar nanoo" def value - if @value - matches = @value.scan(PEREFERENCE_RE) - rv = @value.clone - if @parent - sum = 0 - matches.each do |entity_reference| - entity_value = @parent.entity( entity_reference[0] ) - if sum + entity_value.bytesize > Security.entity_expansion_text_limit - raise "entity expansion has grown too large" - else - sum += entity_value.bytesize - end - rv.gsub!( /%#{entity_reference.join};/um, entity_value ) + @resolved_value ||= resolve_value + end + + def parent=(other) + @resolved_value = nil + super + end + + private + def resolve_value + return nil if @value.nil? + return @value unless @value.match?(PEREFERENCE_RE) + + matches = @value.scan(PEREFERENCE_RE) + rv = @value.clone + if @parent + sum = 0 + matches.each do |entity_reference| + entity_value = @parent.entity( entity_reference[0] ) + if sum + entity_value.bytesize > Security.entity_expansion_text_limit + raise "entity expansion has grown too large" + else + sum += entity_value.bytesize end + rv.gsub!( /%#{entity_reference.join};/um, entity_value ) end - return rv end - nil + rv end end diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb index c375f14..811b2ff 100644 --- a/lib/rexml/formatters/default.rb +++ b/lib/rexml/formatters/default.rb @@ -1,4 +1,5 @@ # frozen_string_literal: false + module REXML module Formatters class Default @@ -101,11 +102,14 @@ module REXML end def write_instruction( node, output ) - output << Instruction::START.sub(/\\/u, '') + output << Instruction::START output << node.target - output << ' ' - output << node.content - output << Instruction::STOP.sub(/\\/u, '') + content = node.content + if content + output << ' ' + output << content + end + output << Instruction::STOP end end end diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb index a80274b..a1198b7 100644 --- a/lib/rexml/formatters/pretty.rb +++ b/lib/rexml/formatters/pretty.rb @@ -1,5 +1,5 @@ -# frozen_string_literal: false -require 'rexml/formatters/default' +# frozen_string_literal: true +require_relative 'default' module REXML module Formatters @@ -58,7 +58,7 @@ module REXML skip = false if compact if node.children.inject(true) {|s,c| s & c.kind_of?(Text)} - string = "" + string = +"" old_level = @level @level = 0 node.children.each { |child| write( child, string ) } diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb index 81e67f3..5ff51e1 100644 --- a/lib/rexml/formatters/transitive.rb +++ b/lib/rexml/formatters/transitive.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/formatters/pretty' +require_relative 'pretty' module REXML module Formatters diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index cd879fd..4c11461 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -66,11 +66,11 @@ module REXML def Functions::id( object ) end - # UNTESTED - def Functions::local_name( node_set=nil ) - get_namespace( node_set ) do |node| + def Functions::local_name(node_set=nil) + get_namespace(node_set) do |node| return node.local_name end + "" end def Functions::namespace_uri( node_set=nil ) @@ -86,10 +86,14 @@ module REXML # Helper method. def Functions::get_namespace( node_set = nil ) if node_set == nil - yield @@context[:node] if defined? @@context[:node].namespace + yield @@context[:node] if @@context[:node].respond_to?(:namespace) else if node_set.respond_to? :each - node_set.each { |node| yield node if defined? node.namespace } + result = [] + node_set.each do |node| + result << yield(node) if node.respond_to?(:namespace) + end + result elsif node_set.respond_to? :namespace yield node_set end @@ -131,22 +135,38 @@ module REXML # # An object of a type other than the four basic types is converted to a # string in a way that is dependent on that type. - def Functions::string( object=nil ) - #object = @context unless object - if object.instance_of? Array - string( object[0] ) - elsif defined? object.node_type - if object.node_type == :attribute + def Functions::string( object=@@context[:node] ) + if object.respond_to?(:node_type) + case object.node_type + when :attribute object.value - elsif object.node_type == :element || object.node_type == :document + when :element string_value(object) + when :document + string_value(object.root) + when :processing_instruction + object.content else object.to_s end - elsif object.nil? - return "" else - object.to_s + case object + when Array + string(object[0]) + when Float + if object.nan? + "NaN" + else + integer = object.to_i + if object == integer + "%d" % integer + else + object.to_s + end + end + else + object.to_s + end end end @@ -167,9 +187,12 @@ module REXML rv end - # UNTESTED def Functions::concat( *objects ) - objects.join + concatenated = "" + objects.each do |object| + concatenated << string(object) + end + concatenated end # Fixed by Mike Stok @@ -239,11 +262,10 @@ module REXML string(string).length end - # UNTESTED def Functions::normalize_space( string=nil ) string = string(@@context[:node]) if string.nil? if string.kind_of? Array - string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string} + string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x} else string.to_s.strip.gsub(/\s+/um, ' ') end @@ -292,18 +314,23 @@ module REXML end end - # UNTESTED - def Functions::boolean( object=nil ) - if object.kind_of? String - if object =~ /\d+/u - return object.to_f != 0 - else - return object.size > 0 - end - elsif object.kind_of? Array - object = object.find{|x| x and true} + def Functions::boolean(object=@@context[:node]) + case object + when true, false + object + when Float + return false if object.zero? + return false if object.nan? + true + when Numeric + not object.zero? + when String + not object.empty? + when Array + not object.empty? + else + object ? true : false end - return object ? true : false end # UNTESTED @@ -357,25 +384,23 @@ module REXML # # an object of a type other than the four basic types is converted to a # number in a way that is dependent on that type - def Functions::number( object=nil ) - object = @@context[:node] unless object + def Functions::number(object=@@context[:node]) case object when true Float(1) when false Float(0) when Array - number(string( object )) + number(string(object)) when Numeric object.to_f else - str = string( object ) - # If XPath ever gets scientific notation... - #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/ - if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/ - str.to_f + str = string(object) + case str.strip + when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/ + $1.to_f else - (0.0 / 0.0) + Float::NAN end end end @@ -397,7 +422,7 @@ module REXML number = number(number) begin neg = number.negative? - number = number.abs.round(half: :up) + number = number.abs.round neg ? -number : number rescue FloatDomainError number diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb index c4f65ee..318741f 100644 --- a/lib/rexml/instruction.rb +++ b/lib/rexml/instruction.rb @@ -1,13 +1,14 @@ # frozen_string_literal: false -require "rexml/child" -require "rexml/source" + +require_relative "child" +require_relative "source" module REXML # Represents an XML Instruction; IE, <? ... ?> # TODO: Add parent arg (3rd arg) to constructor class Instruction < Child - START = '<\?' - STOP = '\?>' + START = "<?" + STOP = "?>" # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?> # content is everything else. @@ -17,20 +18,25 @@ module REXML # @param target can be one of a number of things. If String, then # the target of this instruction is set to this. If an Instruction, # then the Instruction is shallowly cloned (target and content are - # copied). If a Source, then the source is scanned and parsed for - # an Instruction declaration. + # copied). # @param content Must be either a String, or a Parent. Can only # be a Parent if the target argument is a Source. Otherwise, this # String is set as the content of this instruction. def initialize(target, content=nil) - if target.kind_of? String + case target + when String super() @target = target @content = content - elsif target.kind_of? Instruction + when Instruction super(content) @target = target.target @content = target.content + else + message = + "processing instruction target must be String or REXML::Instruction: " + message << "<#{target.inspect}>" + raise ArgumentError, message end @content.strip! if @content end @@ -45,11 +51,13 @@ module REXML def write writer, indent=-1, transitive=false, ie_hack=false Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1) indent(writer, indent) - writer << START.sub(/\\/u, '') + writer << START writer << @target - writer << ' ' - writer << @content - writer << STOP.sub(/\\/u, '') + if @content + writer << ' ' + writer << @content + end + writer << STOP end # @return true if other is an Instruction, and the content and target diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb index d58119a..3dab885 100644 --- a/lib/rexml/light/node.rb +++ b/lib/rexml/light/node.rb @@ -1,14 +1,6 @@ # frozen_string_literal: false -require 'rexml/xmltokens' - -# [ :element, parent, name, attributes, children* ] - # a = Node.new - # a << "B" # => <a>B</a> - # a.b # => <a>B<b/></a> - # a.b[1] # => <a>B<b/><b/><a> - # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a> - # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a> - # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a> +require_relative '../xmltokens' + module REXML module Light # Represents a tagged XML element. Elements are characterized by diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb index 90ba7cc..2e67252 100644 --- a/lib/rexml/namespace.rb +++ b/lib/rexml/namespace.rb @@ -1,5 +1,6 @@ -# frozen_string_literal: false -require 'rexml/xmltokens' +# frozen_string_literal: true + +require_relative 'xmltokens' module REXML # Adds named attributes to an object. @@ -9,19 +10,33 @@ module REXML # The expanded name of the object, valid if name is set attr_accessor :prefix include XMLTokens + NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u # Sets the name and the expanded name def name=( name ) @expanded_name = name - name =~ NAMESPLIT - if $1 - @prefix = $1 - else + if name.match?(NAME_WITHOUT_NAMESPACE) @prefix = "" @namespace = "" + @name = name + elsif name =~ NAMESPLIT + if $1 + @prefix = $1 + else + @prefix = "" + @namespace = "" + end + @name = $2 + elsif name == "" + @prefix = nil + @namespace = nil + @name = nil + else + message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: " + message += "<#{name.inspect}>" + raise ArgumentError, message end - @name = $2 end # Compares names optionally WITH namespaces diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index 52337ad..c771db7 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require "rexml/parseexception" -require "rexml/formatters/pretty" -require "rexml/formatters/default" +require_relative "parseexception" +require_relative "formatters/pretty" +require_relative "formatters/default" module REXML # Represents a node in the tree. Nodes are never encountered except as @@ -52,10 +52,14 @@ module REXML # Visit all subnodes of +self+ recursively def each_recursive(&block) # :yields: node - self.elements.each {|node| - block.call(node) - node.each_recursive(&block) - } + stack = [] + each { |child| stack.unshift child if child.node_type == :element } + until stack.empty? + child = stack.pop + yield child + n = stack.size + child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element } + end end # Find (and return) first subnode (recursively) for which the block diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb index 96dfea5..88a5fb3 100644 --- a/lib/rexml/output.rb +++ b/lib/rexml/output.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/encoding' +require_relative 'encoding' module REXML class Output diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb index 3bd0a96..6a53b37 100644 --- a/lib/rexml/parent.rb +++ b/lib/rexml/parent.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "child" module REXML # A parent has children, and has methods for accessing them. The Parent diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb index 7b16cd1..e57d05f 100644 --- a/lib/rexml/parseexception.rb +++ b/lib/rexml/parseexception.rb @@ -29,6 +29,7 @@ module REXML err << "\nLine: #{line}\n" err << "Position: #{position}\n" err << "Last 80 unconsumed characters:\n" + err.force_encoding("ASCII-8BIT") err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ') end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index e7ef695..275372e 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,14 +1,23 @@ -# frozen_string_literal: false - -require "strscan" - -require 'rexml/parseexception' -require 'rexml/undefinednamespaceexception' -require 'rexml/source' +# frozen_string_literal: true +require_relative '../parseexception' +require_relative '../undefinednamespaceexception' +require_relative '../source' require 'set' +require "strscan" module REXML module Parsers + if StringScanner::Version < "3.0.8" + module StringScannerCaptures + refine StringScanner do + def captures + values_at(*(1...size)) + end + end + end + using StringScannerCaptures + end + # = Using the Pull Parser # <em>This API is experimental, and subject to change.</em> # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) @@ -61,7 +70,7 @@ module REXML XMLDECL_START = /\A<\?xml\s/u; XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u - INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um + INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um TAG_MATCH = /\A<((?>#{QNAME_STR}))/um CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um @@ -98,7 +107,7 @@ module REXML ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" - ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um NOTATIONDECL_START = /\A\s*<!NOTATION/um EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um @@ -114,16 +123,29 @@ module REXML "apos" => [/'/, "'", "'", /'/] } - - ###################################################################### - # These are patterns to identify common markup errors, to make the - # error messages more informative. - ###################################################################### - MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um + module Private + INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um + TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um + ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NAME_PATTERN = /\s*#{NAME}/um + GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" + PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" + ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um + CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/ + CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ + DEFAULT_ENTITIES_PATTERNS = {} + default_entities = ['gt', 'lt', 'quot', 'apos', 'amp'] + default_entities.each do |term| + DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/ + end + end + private_constant :Private def initialize( source ) self.stream = source @listeners = [] + @prefixes = Set.new end def add_listener( listener ) @@ -189,6 +211,8 @@ module REXML # Returns the next event. This is a +PullEvent+ object. def pull + @source.drop_parsed_content + pull_event.tap do |event| @listeners.each do |listener| listener.receive event @@ -201,248 +225,257 @@ module REXML x, @closed = @closed, nil return [ :end_element, x ] end - return [ :end_document ] if empty? + if empty? + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", @source) + end + return [ :end_document ] + end return @stack.shift if @stack.size > 0 #STDERR.puts @source.encoding #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + + @source.ensure_buffer if @document_status == nil - word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) - word = word[1] unless word.nil? - #STDERR.puts "WORD = #{word.inspect}" - case word - when COMMENT_START - return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] - when XMLDECL_START - #STDERR.puts "XMLDECL" - results = @source.match( XMLDECL_PATTERN, true )[1] - version = VERSION.match( results ) - version = version[1] unless version.nil? - encoding = ENCODING.match(results) - encoding = encoding[1] unless encoding.nil? - if need_source_encoding_update?(encoding) - @source.encoding = encoding - end - if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding - encoding = "UTF-16" - end - standalone = STANDALONE.match(results) - standalone = standalone[1] unless standalone.nil? - return [ :xmldecl, version, encoding, standalone ] - when INSTRUCTION_START - return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] - when DOCTYPE_START - base_error_message = "Malformed DOCTYPE" - @source.match(DOCTYPE_START, true) - @nsstack.unshift(curr_ns=Set.new) - name = parse_name(base_error_message) - if @source.match(/\A\s*\[/um, true) - id = [nil, nil, nil] - @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) - id = [nil, nil, nil] - @document_status = :after_doctype - else - id = parse_id(base_error_message, - accept_external_id: true, - accept_public_id: false) - if id[0] == "SYSTEM" - # For backward compatibility - id[1], id[2] = id[2], nil + start_position = @source.position + if @source.match("<?", true) + return process_instruction(start_position) + elsif @source.match("<!", true) + if @source.match("--", true) + md = @source.match(/(.*?)-->/um, true) + if md.nil? + raise REXML::ParseException.new("Unclosed comment", @source) + end + if /--|-\z/.match?(md[1]) + raise REXML::ParseException.new("Malformed comment", @source) + end + return [ :comment, md[1] ] + elsif @source.match("DOCTYPE", true) + base_error_message = "Malformed DOCTYPE" + unless @source.match(/\s+/um, true) + if @source.match(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" + end + @source.position = start_position + raise REXML::ParseException.new(message, @source) end - if @source.match(/\A\s*\[/um, true) - @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) + @nsstack.unshift(Set.new) + name = parse_name(base_error_message) + if @source.match(/\s*\[/um, true) + id = [nil, nil, nil] + @document_status = :in_doctype + elsif @source.match(/\s*>/um, true) + id = [nil, nil, nil] @document_status = :after_doctype + @source.ensure_buffer else - message = "#{base_error_message}: garbage after external ID" - raise REXML::ParseException.new(message, @source) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: false) + if id[0] == "SYSTEM" + # For backward compatibility + id[1], id[2] = id[2], nil + end + if @source.match(/\s*\[/um, true) + @document_status = :in_doctype + elsif @source.match(/\s*>/um, true) + @document_status = :after_doctype + @source.ensure_buffer + else + message = "#{base_error_message}: garbage after external ID" + raise REXML::ParseException.new(message, @source) + end end - end - args = [:start_doctype, name, *id] - if @document_status == :after_doctype - @source.match(/\A\s*/um, true) - @stack << [ :end_doctype ] - end - return args - when /\A\s+/ - else - @document_status = :after_doctype - if @source.encoding == "UTF-8" - @source.buffer.force_encoding(::Encoding::UTF_8) + args = [:start_doctype, name, *id] + if @document_status == :after_doctype + @source.match(/\s*/um, true) + @stack << [ :end_doctype ] + end + return args + else + message = "Invalid XML" + raise REXML::ParseException.new(message, @source) end end end if @document_status == :in_doctype - md = @source.match(/\A\s*(.*?>)/um) - case md[1] - when SYSTEMENTITY - match = @source.match( SYSTEMENTITY, true )[1] - return [ :externalentity, match ] - - when ELEMENTDECL_START - return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] - - when ENTITY_START - match = @source.match( ENTITYDECL, true ).to_a.compact - match[0] = :entitydecl - ref = false - if match[1] == '%' - ref = true - match.delete_at 1 - end - # Now we have to sort out what kind of entity reference this is - if match[2] == 'SYSTEM' - # External reference - match[3] = match[3][1..-2] # PUBID - match.delete_at(4) if match.size > 4 # Chop out NDATA decl - # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] - elsif match[2] == 'PUBLIC' - # External reference - match[3] = match[3][1..-2] # PUBID - match[4] = match[4][1..-2] # HREF - match.delete_at(5) if match.size > 5 # Chop out NDATA decl - # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] - else - match[2] = match[2][1..-2] - match.pop if match.size == 4 - # match is [ :entity, name, value ] - end - match << '%' if ref - return match - when ATTLISTDECL_START - md = @source.match( ATTLISTDECL_PATTERN, true ) - raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? - element = md[1] - contents = md[0] - - pairs = {} - values = md[0].scan( ATTDEF_RE ) - values.each do |attdef| - unless attdef[3] == "#IMPLIED" - attdef.compact! - val = attdef[3] - val = attdef[4] if val == "#FIXED " - pairs[attdef[0]] = val - if attdef[0] =~ /^xmlns:(.*)/ - @nsstack[0] << $1 - end + @source.match(/\s*/um, true) # skip spaces + start_position = @source.position + if @source.match("<!", true) + if @source.match("ELEMENT", true) + md = @source.match(/(.*?)>/um, true) + raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? + return [ :elementdecl, "<!ELEMENT" + md[1] ] + elsif @source.match("ENTITY", true) + match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact] + ref = false + if match[1] == '%' + ref = true + match.delete_at 1 end - end - return [ :attlistdecl, element, pairs, contents ] - when NOTATIONDECL_START - base_error_message = "Malformed notation declaration" - unless @source.match(/\A\s*<!NOTATION\s+/um, true) - if @source.match(/\A\s*<!NOTATION\s*>/um) - message = "#{base_error_message}: name is missing" + # Now we have to sort out what kind of entity reference this is + if match[2] == 'SYSTEM' + # External reference + match[3] = match[3][1..-2] # PUBID + match.delete_at(4) if match.size > 4 # Chop out NDATA decl + # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] + elsif match[2] == 'PUBLIC' + # External reference + match[3] = match[3][1..-2] # PUBID + match[4] = match[4][1..-2] # HREF + match.delete_at(5) if match.size > 5 # Chop out NDATA decl + # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] else - message = "#{base_error_message}: invalid declaration name" + match[2] = match[2][1..-2] + match.pop if match.size == 4 + # match is [ :entity, name, value ] end - raise REXML::ParseException.new(message, @source) - end - name = parse_name(base_error_message) - id = parse_id(base_error_message, - accept_external_id: true, - accept_public_id: true) - unless @source.match(/\A\s*>/um, true) - message = "#{base_error_message}: garbage before end >" - raise REXML::ParseException.new(message, @source) + match << '%' if ref + return match + elsif @source.match("ATTLIST", true) + md = @source.match(Private::ATTLISTDECL_END, true) + raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? + element = md[1] + contents = md[0] + + pairs = {} + values = md[0].scan( ATTDEF_RE ) + values.each do |attdef| + unless attdef[3] == "#IMPLIED" + attdef.compact! + val = attdef[3] + val = attdef[4] if val == "#FIXED " + pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end + end + end + return [ :attlistdecl, element, pairs, contents ] + elsif @source.match("NOTATION", true) + base_error_message = "Malformed notation declaration" + unless @source.match(/\s+/um, true) + if @source.match(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" + end + @source.position = start_position + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) + unless @source.match(/\s*>/um, true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end + return [:notationdecl, name, *id] + elsif md = @source.match(/--(.*?)-->/um, true) + case md[1] + when /--/, /-\z/ + raise REXML::ParseException.new("Malformed comment", @source) + end + return [ :comment, md[1] ] if md end - return [:notationdecl, name, *id] - when DOCTYPE_END + elsif match = @source.match(/(%.*?;)\s*/um, true) + return [ :externalentity, match[1] ] + elsif @source.match(/\]\s*>/um, true) @document_status = :after_doctype - @source.match( DOCTYPE_END, true ) return [ :end_doctype ] end + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) + end end if @document_status == :after_doctype - @source.match(/\A\s*/um, true) + @source.match(/\s*/um, true) end begin - @source.read if @source.buffer.size<2 - if @source.buffer[0] == ?< - if @source.buffer[1] == ?/ + start_position = @source.position + if @source.match("<", true) + # :text's read_until may remain only "<" in buffer. In the + # case, buffer is empty here. So we need to fill buffer + # here explicitly. + @source.ensure_buffer + if @source.match("/", true) @nsstack.shift last_tag = @tags.pop - #md = @source.match_to_consume( '>', CLOSE_MATCH) - md = @source.match( CLOSE_MATCH, true ) + md = @source.match(Private::CLOSE_PATTERN, true) if md and !last_tag message = "Unexpected top-level end tag (got '#{md[1]}')" raise REXML::ParseException.new(message, @source) end if md.nil? or last_tag != md[1] message = "Missing end tag for '#{last_tag}'" - message << " (got '#{md[1]}')" if md + message += " (got '#{md[1]}')" if md + @source.position = start_position if md.nil? raise REXML::ParseException.new(message, @source) end return [ :end_element, last_tag ] - elsif @source.buffer[1] == ?! - md = @source.match(/\A(\s*[^>]*>)/um) + elsif @source.match("!", true) + md = @source.match(/([^>]*>)/um) #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md - if md[0][2] == ?- - md = @source.match( COMMENT_PATTERN, true ) + if md[0][0] == ?- + md = @source.match(/--(.*?)-->/um, true) - case md[1] - when /--/, /-\z/ + if md.nil? || /--|-\z/.match?(md[1]) raise REXML::ParseException.new("Malformed comment", @source) end - return [ :comment, md[1] ] if md + return [ :comment, md[1] ] else - md = @source.match( CDATA_PATTERN, true ) + md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) return [ :cdata, md[1] ] if md end raise REXML::ParseException.new( "Declarations can only occur "+ "in the doctype declaration.", @source) - elsif @source.buffer[1] == ?? - md = @source.match( INSTRUCTION_PATTERN, true ) - return [ :processing_instruction, md[1], md[2] ] if md - raise REXML::ParseException.new( "Bad instruction declaration", - @source) + elsif @source.match("?", true) + return process_instruction(start_position) else # Get the next tag - md = @source.match(TAG_MATCH, true) + md = @source.match(Private::TAG_PATTERN, true) unless md - # Check for missing attribute quotes - raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) + @source.position = start_position raise REXML::ParseException.new("malformed XML: missing tag start", @source) end + tag = md[1] @document_status = :in_element - prefixes = Set.new - prefixes << md[2] if md[2] + @prefixes.clear + @prefixes << md[2] if md[2] @nsstack.unshift(curr_ns=Set.new) - attributes, closed = parse_attributes(prefixes, curr_ns) + attributes, closed = parse_attributes(@prefixes, curr_ns) # Verify that all of the prefixes have been defined - for prefix in prefixes + for prefix in @prefixes unless @nsstack.find{|k| k.member?(prefix)} raise UndefinedNamespaceException.new(prefix,@source,self) end end if closed - @closed = md[1] + @closed = tag @nsstack.shift else - @tags.push( md[1] ) + @tags.push( tag ) end - return [ :start_element, md[1], attributes ] + return [ :start_element, tag, attributes ] end else - md = @source.match( TEXT_PATTERN, true ) - if md[0].length == 0 - @source.match( /(\s+)/, true ) + text = @source.read_until("<") + if text.chomp!("<") + @source.position -= "<".bytesize end - #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 - #return [ :text, "" ] if md[0].length == 0 - # unnormalized = Text::unnormalize( md[1], self ) - # return PullEvent.new( :text, md[1], unnormalized ) - return [ :text, md[1] ] + return [ :text, text ] end rescue REXML::UndefinedNamespaceException raise rescue REXML::ParseException raise - rescue Exception, NameError => error + rescue => error raise REXML::ParseException.new( "Exception parsing", @source, self, (error ? error : $!) ) end @@ -478,11 +511,10 @@ module REXML # Unescapes all possible entities def unnormalize( string, entities=nil, filter=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) + rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" ) matches = rv.scan( REFERENCE_RE ) return rv if matches.size == 0 - rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { + rv.gsub!( Private::CHARACTER_REFERENCES ) { m=$1 m = "0#{m}" if m[0] == ?x [Integer(m)].pack('U*') @@ -493,7 +525,7 @@ module REXML unless filter and filter.include?(entity_reference) entity_value = entity( entity_reference, entities ) if entity_value - re = /&#{entity_reference};/ + re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) else er = DEFAULT_ENTITIES[entity_reference] @@ -501,7 +533,7 @@ module REXML end end end - rv.gsub!( /&/, '&' ) + rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' ) end rv end @@ -514,9 +546,9 @@ module REXML end def parse_name(base_error_message) - md = @source.match(/\A\s*#{NAME}/um, true) + md = @source.match(Private::NAME_PATTERN, true) unless md - if @source.match(/\A\s*\S/um) + if @source.match(/\s*\S/um) message = "#{base_error_message}: invalid name" else message = "#{base_error_message}: name is missing" @@ -592,88 +624,91 @@ module REXML end end - def parse_attributes(prefixes, curr_ns) - attributes = {} - closed = false - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data.nil? - message = "Start tag isn't ended" + def process_instruction(start_position) + match_data = @source.match(Private::INSTRUCTION_END, true) + unless match_data + message = "Invalid processing instruction node" + @source.position = start_position raise REXML::ParseException.new(message, @source) end - - raw_attributes = match_data[1] - closed = !match_data[2].nil? - return attributes, closed if raw_attributes.nil? - return attributes, closed if raw_attributes.empty? - - scanner = StringScanner.new(raw_attributes) - until scanner.eos? - if scanner.scan(/\s+/) - break if scanner.eos? + if @document_status.nil? and match_data[1] == "xml" + content = match_data[2] + version = VERSION.match(content) + version = version[1] unless version.nil? + encoding = ENCODING.match(content) + encoding = encoding[1] unless encoding.nil? + if need_source_encoding_update?(encoding) + @source.encoding = encoding + end + if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding + encoding = "UTF-16" end + standalone = STANDALONE.match(content) + standalone = standalone[1] unless standalone.nil? + return [ :xmldecl, version, encoding, standalone ] + end + [:processing_instruction, match_data[1], match_data[2]] + end - pos = scanner.pos - loop do - break if scanner.scan(ATTRIBUTE_PATTERN) - unless scanner.scan(QNAME) - message = "Invalid attribute name: <#{scanner.rest}>" - raise REXML::ParseException.new(message, @source) - end - name = scanner[0] - unless scanner.scan(/\s*=\s*/um) + def parse_attributes(prefixes, curr_ns) + attributes = {} + closed = false + while true + if @source.match(">", true) + return attributes, closed + elsif @source.match("/>", true) + closed = true + return attributes, closed + elsif match = @source.match(QNAME, true) + name = match[1] + prefix = match[2] + local_part = match[3] + + unless @source.match(/\s*=\s*/um, true) message = "Missing attribute equal: <#{name}>" raise REXML::ParseException.new(message, @source) end - quote = scanner.scan(/['"]/) - unless quote + unless match = @source.match(/(['"])/, true) message = "Missing attribute value start quote: <#{name}>" raise REXML::ParseException.new(message, @source) end - unless scanner.scan(/.*#{Regexp.escape(quote)}/um) - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data - scanner << "/" if closed - scanner << ">" - scanner << match_data[1] - scanner.pos = pos - closed = !match_data[2].nil? - next - end - message = - "Missing attribute value end quote: <#{name}>: <#{quote}>" + quote = match[1] + start_position = @source.position + value = @source.read_until(quote) + unless value.chomp!(quote) + @source.position = start_position + message = "Missing attribute value end quote: <#{name}>: <#{quote}>" raise REXML::ParseException.new(message, @source) end - end - name = scanner[1] - prefix = scanner[2] - local_part = scanner[3] - # quote = scanner[4] - value = scanner[5] - if prefix == "xmlns" - if local_part == "xml" - if value != "http://www.w3.org/XML/1998/namespace" - msg = "The 'xml' prefix must not be bound to any other namespace "+ + @source.match(/\s*/um, true) + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif local_part == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self ) + raise REXML::ParseException.new( msg, @source, self) end - elsif local_part == "xmlns" - msg = "The 'xmlns' prefix must not be declared "+ - "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self) + curr_ns << local_part + elsif prefix + prefixes << prefix unless prefix == "xml" end - curr_ns << local_part - elsif prefix - prefixes << prefix unless prefix == "xml" - end - if attributes.has_key?(name) - msg = "Duplicate attribute #{name.inspect}" - raise REXML::ParseException.new(msg, @source, self) - end + if attributes[name] + msg = "Duplicate attribute #{name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end - attributes[name] = value + attributes[name] = value + else + message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" + raise REXML::ParseException.new(message, @source) + end end - return attributes, closed end end end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index f0601ae..bdc0827 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require 'rexml/parsers/streamparser' -require 'rexml/parsers/baseparser' -require 'rexml/light/node' +require_relative 'streamparser' +require_relative 'baseparser' +require_relative '../light/node' module REXML module Parsers diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index 8c49217..f8b232a 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -1,9 +1,9 @@ # frozen_string_literal: false require 'forwardable' -require 'rexml/parseexception' -require 'rexml/parsers/baseparser' -require 'rexml/xmltokens' +require_relative '../parseexception' +require_relative 'baseparser' +require_relative '../xmltokens' module REXML module Parsers diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 1386f69..6a24ce2 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -1,8 +1,8 @@ # frozen_string_literal: false -require 'rexml/parsers/baseparser' -require 'rexml/parseexception' -require 'rexml/namespace' -require 'rexml/text' +require_relative 'baseparser' +require_relative '../parseexception' +require_relative '../namespace' +require_relative '../text' module REXML module Parsers diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index f6a8bfa..9e0eb0b 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/parsers/baseparser" +require_relative "baseparser" module REXML module Parsers diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index fc0993c..0cb6f7c 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/validation/validationexception' -require 'rexml/undefinednamespaceexception' +require_relative '../validation/validationexception' +require_relative '../undefinednamespaceexception' module REXML module Parsers @@ -16,7 +16,6 @@ module REXML def parse tag_stack = [] - in_doctype = false entities = nil begin while true @@ -39,17 +38,15 @@ module REXML tag_stack.pop @build_context = @build_context.parent when :text - if not in_doctype - if @build_context[-1].instance_of? Text - @build_context[-1] << event[1] - else - @build_context.add( - Text.new(event[1], @build_context.whitespace, nil, true) - ) unless ( - @build_context.ignore_whitespace_nodes and - event[1].strip.size==0 - ) - end + if @build_context[-1].instance_of? Text + @build_context[-1] << event[1] + else + @build_context.add( + Text.new(event[1], @build_context.whitespace, nil, true) + ) unless ( + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 + ) end when :comment c = Comment.new( event[1] ) @@ -60,14 +57,12 @@ module REXML when :processing_instruction @build_context.add( Instruction.new( event[1], event[2] ) ) when :end_doctype - in_doctype = false entities.each { |k,v| entities[k] = @build_context.entities[k].value } @build_context = @build_context.parent when :start_doctype doctype = DocType.new( event[1..-1], @build_context ) @build_context = doctype entities = {} - in_doctype = true when :attlistdecl n = AttlistDecl.new( event[1..-1] ) @build_context.add( n ) diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb index 6571d11..e0029f4 100644 --- a/lib/rexml/parsers/ultralightparser.rb +++ b/lib/rexml/parsers/ultralightparser.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/parsers/streamparser' -require 'rexml/parsers/baseparser' +require_relative 'streamparser' +require_relative 'baseparser' module REXML module Parsers diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 32b70bb..bd3b685 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -1,6 +1,7 @@ # frozen_string_literal: false -require 'rexml/namespace' -require 'rexml/xmltokens' + +require_relative '../namespace' +require_relative '../xmltokens' module REXML module Parsers @@ -22,7 +23,13 @@ module REXML path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces path.gsub!( /\s+([\]\)])/, '\1') parsed = [] - OrExpr(path, parsed) + rest = OrExpr(path, parsed) + if rest + unless rest.strip.empty? + raise ParseException.new("Garbage component exists at the end: " + + "<#{rest}>: <#{path}>") + end + end parsed end @@ -32,108 +39,143 @@ module REXML parsed end - def abbreviate( path ) - path = path.kind_of?(String) ? parse( path ) : path - string = "" - document = false - while path.size > 0 - op = path.shift + def abbreviate(path_or_parsed) + if path_or_parsed.kind_of?(String) + parsed = parse(path_or_parsed) + else + parsed = path_or_parsed + end + components = [] + component = nil + while parsed.size > 0 + op = parsed.shift case op when :node + component << "node()" when :attribute - string << "/" if string.size > 0 - string << "@" + component = "@" + components << component when :child - string << "/" if string.size > 0 + component = "" + components << component when :descendant_or_self - string << "/" + next_op = parsed[0] + if next_op == :node + parsed.shift + component = "" + components << component + else + component = "descendant-or-self::" + components << component + end when :self - string << "." + next_op = parsed[0] + if next_op == :node + parsed.shift + components << "." + else + component = "self::" + components << component + end when :parent - string << ".." + next_op = parsed[0] + if next_op == :node + parsed.shift + components << ".." + else + component = "parent::" + components << component + end when :any - string << "*" + component << "*" when :text - string << "text()" + component << "text()" when :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :namespace, :preceding, :preceding_sibling - string << "/" unless string.size == 0 - string << op.to_s.tr("_", "-") - string << "::" + component = op.to_s.tr("_", "-") << "::" + components << component when :qname - prefix = path.shift - name = path.shift - string << prefix+":" if prefix.size > 0 - string << name + prefix = parsed.shift + name = parsed.shift + component << prefix+":" if prefix.size > 0 + component << name when :predicate - string << '[' - string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } - string << ']' + component << '[' + component << predicate_to_path(parsed.shift) {|x| abbreviate(x)} + component << ']' when :document - document = true + components << "" when :function - string << path.shift - string << "( " - string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} - string << " )" + component << parsed.shift + component << "( " + component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)} + component << " )" when :literal - string << %Q{ "#{path.shift}" } + component << quote_literal(parsed.shift) else - string << "/" unless string.size == 0 - string << "UNKNOWN(" - string << op.inspect - string << ")" + component << "UNKNOWN(" + component << op.inspect + component << ")" end end - string = "/"+string if document - return string + case components + when [""] + "/" + when ["", ""] + "//" + else + components.join("/") + end end - def expand( path ) - path = path.kind_of?(String) ? parse( path ) : path - string = "" + def expand(path_or_parsed) + if path_or_parsed.kind_of?(String) + parsed = parse(path_or_parsed) + else + parsed = path_or_parsed + end + path = "" document = false - while path.size > 0 - op = path.shift + while parsed.size > 0 + op = parsed.shift case op when :node - string << "node()" + path << "node()" when :attribute, :child, :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, :namespace, :preceding, :preceding_sibling, :self, :parent - string << "/" unless string.size == 0 - string << op.to_s.tr("_", "-") - string << "::" + path << "/" unless path.size == 0 + path << op.to_s.tr("_", "-") + path << "::" when :any - string << "*" + path << "*" when :qname - prefix = path.shift - name = path.shift - string << prefix+":" if prefix.size > 0 - string << name + prefix = parsed.shift + name = parsed.shift + path << prefix+":" if prefix.size > 0 + path << name when :predicate - string << '[' - string << predicate_to_string( path.shift ) { |x| expand(x) } - string << ']' + path << '[' + path << predicate_to_path( parsed.shift ) { |x| expand(x) } + path << ']' when :document document = true else - string << "/" unless string.size == 0 - string << "UNKNOWN(" - string << op.inspect - string << ")" + path << "UNKNOWN(" + path << op.inspect + path << ")" end end - string = "/"+string if document - return string + path = "/"+path if document + path end - def predicate_to_string( path, &block ) - string = "" - case path[0] + def predicate_to_path(parsed, &block) + path = "" + case parsed[0] when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union - op = path.shift + op = parsed.shift case op when :eq op = "=" @@ -150,42 +192,56 @@ module REXML when :union op = "|" end - left = predicate_to_string( path.shift, &block ) - right = predicate_to_string( path.shift, &block ) - string << " " - string << left - string << " " - string << op.to_s - string << " " - string << right - string << " " + left = predicate_to_path( parsed.shift, &block ) + right = predicate_to_path( parsed.shift, &block ) + path << left + path << " " + path << op.to_s + path << " " + path << right when :function - path.shift - name = path.shift - string << name - string << "( " - string << predicate_to_string( path.shift, &block ) - string << " )" + parsed.shift + name = parsed.shift + path << name + path << "(" + parsed.shift.each_with_index do |argument, i| + path << ", " if i > 0 + path << predicate_to_path(argument, &block) + end + path << ")" when :literal - path.shift - string << " " - string << path.shift.inspect - string << " " + parsed.shift + path << quote_literal(parsed.shift) else - string << " " - string << yield( path ) - string << " " + path << yield( parsed ) end - return string.squeeze(" ") + return path.squeeze(" ") end + # For backward compatibility + alias_method :preciate_to_string, :predicate_to_path private + def quote_literal( literal ) + case literal + when String + # XPath 1.0 does not support escape characters. + # Assumes literal does not contain both single and double quotes. + if literal.include?("'") + "\"#{literal}\"" + else + "'#{literal}'" + end + else + literal.inspect + end + end + #LocationPath # | RelativeLocationPath # | '/' RelativeLocationPath? # | '//' RelativeLocationPath def LocationPath path, parsed - path = path.strip + path = path.lstrip if path[0] == ?/ parsed << :document if path[1] == ?/ @@ -209,7 +265,12 @@ module REXML # | RelativeLocationPath '//' Step AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ def RelativeLocationPath path, parsed - while path.size > 0 + loop do + original_path = path + path = path.lstrip + + return original_path if path.empty? + # (axis or @ or <child::>) nodetest predicate > # OR > / Step # (. or ..) > @@ -224,43 +285,44 @@ module REXML path = path[1..-1] end else + path_before_axis_specifier = path + parsed_not_abberviated = [] if path[0] == ?@ - parsed << :attribute + parsed_not_abberviated << :attribute path = path[1..-1] # Goto Nodetest elsif path =~ AXIS - parsed << $1.tr('-','_').intern + parsed_not_abberviated << $1.tr('-','_').intern path = $' # Goto Nodetest else - parsed << :child + parsed_not_abberviated << :child end - n = [] - path = NodeTest( path, n) - - if path[0] == ?[ - path = Predicate( path, n ) + path_before_node_test = path + path = NodeTest(path, parsed_not_abberviated) + if path == path_before_node_test + return path_before_axis_specifier end + path = Predicate(path, parsed_not_abberviated) - parsed.concat(n) + parsed.concat(parsed_not_abberviated) end - if path.size > 0 - if path[0] == ?/ - if path[1] == ?/ - parsed << :descendant_or_self - parsed << :node - path = path[2..-1] - else - path = path[1..-1] - end - else - return path - end + original_path = path + path = path.lstrip + return original_path if path.empty? + + return original_path if path[0] != ?/ + + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] end end - return path end # Returns a 1-1 map of the nodeset @@ -269,15 +331,26 @@ module REXML # String, if a name match #NodeTest # | ('*' | NCNAME ':' '*' | QNAME) NameTest - # | NODE_TYPE '(' ')' NodeType + # | '*' ':' NCNAME NameTest since XPath 2.0 + # | NODE_TYPE '(' ')' NodeType # | PI '(' LITERAL ')' PI # | '[' expr ']' Predicate - NCNAMETEST= /^(#{NCNAME_STR}):\*/u + PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u + LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u QNAME = Namespace::NAMESPLIT NODE_TYPE = /^(comment|text|node)\(\s*\)/m PI = /^processing-instruction\(/ def NodeTest path, parsed + original_path = path + path = path.lstrip case path + when PREFIX_WILDCARD + prefix = nil + name = $1 + path = $' + parsed << :qname + parsed << prefix + parsed << name when /^\*/ path = $' parsed << :any @@ -288,7 +361,9 @@ module REXML when PI path = $' literal = nil - if path !~ /^\s*\)/ + if path =~ /^\s*\)/ + path = $' + else path =~ LITERAL literal = $1 path = $' @@ -297,7 +372,7 @@ module REXML end parsed << :processing_instruction parsed << (literal || '') - when NCNAMETEST + when LOCAL_NAME_WILDCARD prefix = $1 path = $' parsed << :namespace @@ -310,13 +385,17 @@ module REXML parsed << :qname parsed << prefix parsed << name + else + path = original_path end return path end # Filters the supplied nodeset on the predicate(s) def Predicate path, parsed - return nil unless path[0] == ?[ + original_path = path + path = path.lstrip + return original_path unless path[0] == ?[ predicates = [] while path[0] == ?[ path, expr = get_group(path) @@ -421,13 +500,13 @@ module REXML rest end - #| AdditiveExpr ('+' | S '-') MultiplicativeExpr + #| AdditiveExpr ('+' | '-') MultiplicativeExpr #| MultiplicativeExpr def AdditiveExpr path, parsed n = [] rest = MultiplicativeExpr( path, n ) if rest != path - while rest =~ /^\s*(\+| -)\s*/ + while rest =~ /^\s*(\+|-)\s*/ if $1[0] == ?+ n = [ :plus, n, [] ] else @@ -509,13 +588,14 @@ module REXML #| LocationPath #| FilterExpr ('/' | '//') RelativeLocationPath def PathExpr path, parsed - path =~ /^\s*/ - path = $' + path = path.lstrip n = [] rest = FilterExpr( path, n ) if rest != path if rest and rest[0] == ?/ - return RelativeLocationPath(rest, n) + rest = RelativeLocationPath(rest, n) + parsed.concat(n) + return rest end end rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/ @@ -527,8 +607,10 @@ module REXML #| PrimaryExpr def FilterExpr path, parsed n = [] - path = PrimaryExpr( path, n ) - path = Predicate(path, n) if path and path[0] == ?[ + path_before_primary_expr = path + path = PrimaryExpr(path, n) + return path_before_primary_expr if path == path_before_primary_expr + path = Predicate(path, n) parsed.concat(n) path end diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb index 5d6c77c..a0466b2 100644 --- a/lib/rexml/quickpath.rb +++ b/lib/rexml/quickpath.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/functions' -require 'rexml/xmltokens' +require_relative 'functions' +require_relative 'xmltokens' module REXML class QuickPath diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 92e689b..3af03ec 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -1,31 +1,38 @@ -# -*- encoding: utf-8 -*- +# -*- coding: utf-8 -*- # frozen_string_literal: false -# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby. -# -# REXML is a _pure_ Ruby, XML 1.0 conforming, -# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance] -# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis -# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml], -# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also -# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since -# Ruby 1.8, REXML is included in the standard Ruby distribution. -# -# Main page:: http://www.germane-software.com/software/rexml -# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> -# Date:: 2008/019 -# Version:: 3.1.7.3 -# -# This API documentation can be downloaded from the REXML home page, or can -# be accessed online[http://www.germane-software.com/software/rexml_doc] -# -# A tutorial is available in the REXML distribution in docs/tutorial.html, -# or can be accessed -# online[http://www.germane-software.com/software/rexml/docs/tutorial.html] +# +# \Module \REXML provides classes and methods for parsing, +# editing, and generating XML. +# +# == Implementation +# +# \REXML: +# - Is pure Ruby. +# - Provides tree, stream, SAX2, pull, and lightweight APIs. +# - Conforms to {XML version 1.0}[https://www.w3.org/TR/REC-xml/]. +# - Fully implements {XPath version 1.0}[http://www.w3c.org/tr/xpath]. +# - Is {non-validating}[https://www.w3.org/TR/xml/]. +# - Passes 100% of the non-validating {Oasis tests}[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml]. +# +# == In a Hurry? +# +# If you're somewhat familiar with XML +# and have a particular task in mind, +# you may want to see {the tasks pages}[doc/rexml/tasks/tocs/master_toc_rdoc.html]. +# +# == API +# +# Among the most important classes for using \REXML are: +# - REXML::Document. +# - REXML::Element. +# +# There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html]. +# module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>" DATE = "2008/019" - VERSION = "3.1.7.4" - REVISION = %w$Revision: 53141 $[1] || '' + VERSION = "3.3.1" + REVISION = "" Copyright = COPYRIGHT Version = VERSION diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index af65cf4..5715c35 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -1,8 +1,28 @@ # coding: US-ASCII # frozen_string_literal: false -require 'rexml/encoding' + +require "strscan" + +require_relative 'encoding' module REXML + if StringScanner::Version < "1.0.0" + module StringScannerCheckScanString + refine StringScanner do + def check(pattern) + pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) + super(pattern) + end + + def scan(pattern) + pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) + super(pattern) + end + end + end + using StringScannerCheckScanString + end + # Generates Source-s. USE THIS CLASS. class SourceFactory # Generates a Source object @@ -30,18 +50,27 @@ module REXML # objects and provides consumption of text class Source include Encoding - # The current buffer (what we're going to read next) - attr_reader :buffer # The line number of the last consumed text attr_reader :line attr_reader :encoding + module Private + SCANNER_RESET_SIZE = 100000 + PRE_DEFINED_TERM_PATTERNS = {} + pre_defined_terms = ["'", '"', "<"] + pre_defined_terms.each do |term| + PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ + end + end + private_constant :Private + # Constructor # @param arg must be a String, and should be a valid XML document # @param encoding if non-null, sets the encoding of the source to this # value, overriding all encoding detection def initialize(arg, encoding=nil) - @orig = @buffer = arg + @orig = arg + @scanner = StringScanner.new(@orig) if encoding self.encoding = encoding else @@ -50,6 +79,20 @@ module REXML @line = 0 end + # The current buffer (what we're going to read next) + def buffer + @scanner.rest + end + + def drop_parsed_content + if @scanner.pos > Private::SCANNER_RESET_SIZE + @scanner.string = @scanner.rest + end + end + + def buffer_encoding=(encoding) + @scanner.string.force_encoding(encoding) + end # Inherited from Encoding # Overridden to support optimized en/decoding @@ -58,98 +101,78 @@ module REXML encoding_updated end - # Scans the source for a given pattern. Note, that this is not your - # usual scan() method. For one thing, the pattern argument has some - # requirements; for another, the source can be consumed. You can easily - # confuse this method. Originally, the patterns were easier - # to construct and this method more robust, because this method - # generated search regexps on the fly; however, this was - # computationally expensive and slowed down the entire REXML package - # considerably, since this is by far the most commonly called method. - # @param pattern must be a Regexp, and must be in the form of - # /^\s*(#{your pattern, with no groups})(.*)/. The first group - # will be returned; the second group is used if the consume flag is - # set. - # @param consume if true, the pattern returned will be consumed, leaving - # everything after it in the Source. - # @return the pattern, if found, or nil if the Source is empty or the - # pattern is not found. - def scan(pattern, cons=false) - return nil if @buffer.nil? - rv = @buffer.scan(pattern) - @buffer = $' if cons and rv.size>0 - rv + def read(term = nil) end - def read + def read_until(term) + pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + data = @scanner.scan_until(pattern) + unless data + data = @scanner.rest + @scanner.pos = @scanner.string.bytesize + end + data end - def consume( pattern ) - @buffer = $' if pattern.match( @buffer ) + def ensure_buffer end - def match_to( char, pattern ) - return pattern.match(@buffer) + def match(pattern, cons=false) + if cons + @scanner.scan(pattern).nil? ? nil : @scanner + else + @scanner.check(pattern).nil? ? nil : @scanner + end end - def match_to_consume( char, pattern ) - md = pattern.match(@buffer) - @buffer = $' - return md + def position + @scanner.pos end - def match(pattern, cons=false) - md = pattern.match(@buffer) - @buffer = $' if cons and md - return md + def position=(pos) + @scanner.pos = pos end # @return true if the Source is exhausted def empty? - @buffer == "" - end - - def position - @orig.index( @buffer ) + @scanner.eos? end # @return the current line in the source def current_line lines = @orig.split - res = lines.grep @buffer[0..30] + res = lines.grep @scanner.rest[0..30] res = res[-1] if res.kind_of? Array lines.index( res ) if res end private + def detect_encoding - buffer_encoding = @buffer.encoding + scanner_encoding = @scanner.rest.encoding detected_encoding = "UTF-8" begin - @buffer.force_encoding("ASCII-8BIT") - if @buffer[0, 2] == "\xfe\xff" - @buffer[0, 2] = "" + @scanner.string.force_encoding("ASCII-8BIT") + if @scanner.scan(/\xfe\xff/n) detected_encoding = "UTF-16BE" - elsif @buffer[0, 2] == "\xff\xfe" - @buffer[0, 2] = "" + elsif @scanner.scan(/\xff\xfe/n) detected_encoding = "UTF-16LE" - elsif @buffer[0, 3] == "\xef\xbb\xbf" - @buffer[0, 3] = "" + elsif @scanner.scan(/\xef\xbb\xbf/n) detected_encoding = "UTF-8" end ensure - @buffer.force_encoding(buffer_encoding) + @scanner.string.force_encoding(scanner_encoding) end self.encoding = detected_encoding end def encoding_updated if @encoding != 'UTF-8' - @buffer = decode(@buffer) + @scanner.string = decode(@scanner.rest) @to_utf = true else @to_utf = false - @buffer.force_encoding ::Encoding::UTF_8 + @scanner.string.force_encoding(::Encoding::UTF_8) end end end @@ -172,7 +195,7 @@ module REXML end if !@to_utf and - @buffer.respond_to?(:force_encoding) and + @orig.respond_to?(:force_encoding) and @source.respond_to?(:external_encoding) and @source.external_encoding != ::Encoding::UTF_8 @force_utf8 = true @@ -181,65 +204,62 @@ module REXML end end - def scan(pattern, cons=false) - rv = super - # You'll notice that this next section is very similar to the same - # section in match(), but just a liiittle different. This is - # because it is a touch faster to do it this way with scan() - # than the way match() does it; enough faster to warrant duplicating - # some code - if rv.size == 0 - until @buffer =~ pattern or @source.nil? - begin - @buffer << readline - rescue Iconv::IllegalSequence - raise - rescue - @source = nil - end - end - rv = super - end - rv.taint - rv - end - - def read + def read(term = nil) + term = encode(term) if term begin - @buffer << readline + @scanner << readline(term) + true rescue Exception, NameError @source = nil + false + end + end + + def read_until(term) + pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + term = encode(term) + until str = @scanner.scan_until(pattern) + break if @source.nil? + break if @source.eof? + @scanner << readline(term) + end + if str + read if @scanner.eos? and !@source.eof? + str + else + rest = @scanner.rest + @scanner.pos = @scanner.string.bytesize + rest end end - def consume( pattern ) - match( pattern, true ) + def ensure_buffer + read if @scanner.eos? && @source end + # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats: + # - ">" + # - "XXX>" (X is any string excluding '>') def match( pattern, cons=false ) - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - while !rv and @source - begin - @buffer << readline - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - rescue - @source = nil + while true + if cons + md = @scanner.scan(pattern) + else + md = @scanner.check(pattern) end + break if md + return nil if pattern.is_a?(String) + return nil if @source.nil? + return nil unless read end - rv.taint - rv + + md.nil? ? nil : @scanner end def empty? super and ( @source.nil? || @source.eof? ) end - def position - @er_source.pos rescue 0 - end - # @return the current line in the source def current_line begin @@ -254,6 +274,7 @@ module REXML end rescue end + @er_source.seek(pos) rescue IOError pos = -1 line = -1 @@ -262,8 +283,8 @@ module REXML end private - def readline - str = @source.readline(@line_break) + def readline(term = nil) + str = @source.readline(term || @line_break) if @pending_buffer if str.nil? str = @pending_buffer @@ -289,7 +310,7 @@ module REXML @source.set_encoding(@encoding, @encoding) end @line_break = encode(">") - @pending_buffer, @buffer = @buffer, "" + @pending_buffer, @scanner.string = @scanner.rest, "" @pending_buffer.force_encoding(@encoding) super end diff --git a/lib/rexml/syncenumerator.rb b/lib/rexml/syncenumerator.rb deleted file mode 100644 index a9d2ad7..0000000 --- a/lib/rexml/syncenumerator.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: false -module REXML - class SyncEnumerator - include Enumerable - - # Creates a new SyncEnumerator which enumerates rows of given - # Enumerable objects. - def initialize(*enums) - @gens = enums - @length = @gens.collect {|x| x.size }.max - end - - # Returns the number of enumerated Enumerable objects, i.e. the size - # of each row. - def size - @gens.size - end - - # Returns the number of enumerated Enumerable objects, i.e. the size - # of each row. - def length - @gens.length - end - - # Enumerates rows of the Enumerable objects. - def each - @length.times {|i| - yield @gens.collect {|x| x[i]} - } - self - end - end -end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 86269de..b47bad3 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -1,10 +1,10 @@ -# frozen_string_literal: false -require 'rexml/security' -require 'rexml/entity' -require 'rexml/doctype' -require 'rexml/child' -require 'rexml/doctype' -require 'rexml/parseexception' +# frozen_string_literal: true +require_relative 'security' +require_relative 'entity' +require_relative 'doctype' +require_relative 'child' +require_relative 'doctype' +require_relative 'parseexception' module REXML # Represents text nodes in an XML document @@ -96,27 +96,28 @@ module REXML @raw = false @parent = nil + @entity_filter = nil if parent super( parent ) @raw = parent.raw end - @raw = raw unless raw.nil? - @entity_filter = entity_filter - clear_cache - if arg.kind_of? String @string = arg.dup - @string.squeeze!(" \n\t") unless respect_whitespace elsif arg.kind_of? Text - @string = arg.to_s + @string = arg.instance_variable_get(:@string).dup @raw = arg.raw - elsif + @entity_filter = arg.instance_variable_get(:@entity_filter) + else raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" end - @string.gsub!( /\r\n?/, "\n" ) + @string.squeeze!(" \n\t") unless respect_whitespace + @string.gsub!(/\r\n?/, "\n") + @raw = raw unless raw.nil? + @entity_filter = entity_filter if entity_filter + clear_cache Text.check(@string, illegal, doctype) if @raw end @@ -130,13 +131,13 @@ module REXML def Text.check string, pattern, doctype # illegal anywhere - if string !~ VALID_XML_CHARS + if !string.match?(VALID_XML_CHARS) if String.method_defined? :encode string.chars.each do |c| case c.ord when *VALID_CHAR else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" + raise "Illegal character #{c.inspect} in raw string #{string.inspect}" end end else @@ -144,7 +145,7 @@ module REXML case c.unpack('U') when *VALID_CHAR else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" + raise "Illegal character #{c.inspect} in raw string #{string.inspect}" end end end @@ -153,13 +154,13 @@ module REXML # context sensitive string.scan(pattern) do if $1[-1] != ?; - raise "Illegal character '#{$1}' in raw string \"#{string}\"" + raise "Illegal character #{$1.inspect} in raw string #{string.inspect}" elsif $1[0] == ?& if $5 and $5[0] == ?# case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i) when *VALID_CHAR else - raise "Illegal character '#{$1}' in raw string \"#{string}\"" + raise "Illegal character #{$1.inspect} in raw string #{string.inspect}" end # FIXME: below can't work but this needs API change. # elsif @parent and $3 and !SUBSTITUTES.include?($1) @@ -181,7 +182,7 @@ module REXML def clone - return Text.new(self) + return Text.new(self, true) end @@ -226,9 +227,7 @@ module REXML # u.to_s #-> "sean russell" def to_s return @string if @raw - return @normalized if @normalized - - @normalized = Text::normalize( @string, doctype, @entity_filter ) + @normalized ||= Text::normalize( @string, doctype, @entity_filter ) end def inspect @@ -249,8 +248,7 @@ module REXML # u = Text.new( "sean russell", false, nil, true ) # u.value #-> "sean russell" def value - return @unnormalized if @unnormalized - @unnormalized = Text::unnormalize( @string, doctype ) + @unnormalized ||= Text::unnormalize( @string, doctype ) end # Sets the contents of this text node. This expects the text to be @@ -266,16 +264,16 @@ module REXML @raw = false end - def wrap(string, width, addnewline=false) - # Recursively wrap string at width. - return string if string.length <= width - place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) - else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) - end - end + def wrap(string, width, addnewline=false) + # Recursively wrap string at width. + return string if string.length <= width + place = string.rindex(' ', width) # Position in string with last ' ' before cutoff + if addnewline then + return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) + else + return string[0,place] + "\n" + wrap(string[place+1..-1], width) + end + end def indent_text(string, level=1, style="\t", indentfirstline=true) return string if level < 0 @@ -373,7 +371,7 @@ module REXML copy = input.to_s # Doing it like this rather than in a loop improves the speed #copy = copy.gsub( EREFERENCE, '&' ) - copy = copy.gsub( "&", "&" ) + copy = copy.gsub( "&", "&" ) if copy.include?("&") if doctype # Replace all ampersands that aren't part of an entity doctype.entities.each_value do |entity| @@ -384,7 +382,9 @@ module REXML else # Replace all ampersands that aren't part of an entity DocType::DEFAULT_ENTITIES.each_value do |entity| - copy = copy.gsub(entity.value, "&#{entity.name};" ) + if copy.include?(entity.value) + copy = copy.gsub(entity.value, "&#{entity.name};" ) + end end end copy diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb index e522ed5..492a098 100644 --- a/lib/rexml/undefinednamespaceexception.rb +++ b/lib/rexml/undefinednamespaceexception.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/parseexception' +require_relative 'parseexception' module REXML class UndefinedNamespaceException < ParseException def initialize( prefix, source, parser ) diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb index fb52438..f29a2c0 100644 --- a/lib/rexml/validation/relaxng.rb +++ b/lib/rexml/validation/relaxng.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require "rexml/validation/validation" -require "rexml/parsers/baseparser" +require_relative "validation" +require_relative "../parsers/baseparser" module REXML module Validation diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb index f0c76f9..0ad6ada 100644 --- a/lib/rexml/validation/validation.rb +++ b/lib/rexml/validation/validation.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/validation/validationexception' +require_relative 'validationexception' module REXML module Validation diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index a37e9f3..d19407c 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -1,17 +1,18 @@ # frozen_string_literal: false -require 'rexml/encoding' -require 'rexml/source' + +require_relative 'encoding' +require_relative 'source' module REXML # NEEDS DOCUMENTATION class XMLDecl < Child include Encoding - DEFAULT_VERSION = "1.0"; - DEFAULT_ENCODING = "UTF-8"; - DEFAULT_STANDALONE = "no"; - START = '<\?xml'; - STOP = '\?>'; + DEFAULT_VERSION = "1.0" + DEFAULT_ENCODING = "UTF-8" + DEFAULT_STANDALONE = "no" + START = "<?xml" + STOP = "?>" attr_accessor :version, :standalone attr_reader :writeencoding, :writethis @@ -25,6 +26,7 @@ module REXML self.encoding = version.encoding @writeencoding = version.writeencoding @standalone = version.standalone + @writethis = version.writethis else super() @version = version @@ -46,9 +48,9 @@ module REXML # Ignored def write(writer, indent=-1, transitive=false, ie_hack=false) return nil unless @writethis or writer.kind_of? Output - writer << START.sub(/\\/u, '') + writer << START writer << " #{content encoding}" - writer << STOP.sub(/\\/u, '') + writer << STOP end def ==( other ) @@ -102,14 +104,26 @@ module REXML end def inspect - START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '') + "#{START} ... #{STOP}" end private def content(enc) - rv = "version='#@version'" - rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i - rv << " standalone='#@standalone'" if @standalone + context = nil + context = parent.context if parent + if context and context[:prologue_quote] == :quote + quote = "\"" + else + quote = "'" + end + + rv = "version=#{quote}#{@version}#{quote}" + if @writeencoding or enc !~ /\Autf-8\z/i + rv << " encoding=#{quote}#{enc}#{quote}" + end + if @standalone + rv << " standalone=#{quote}#{@standalone}#{quote}" + end rv end end diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb index f1cb99b..a0921bd 100644 --- a/lib/rexml/xpath.rb +++ b/lib/rexml/xpath.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/functions' -require 'rexml/xpath_parser' +require_relative 'functions' +require_relative 'xpath_parser' module REXML # Wrapper class. Use this class to access the XPath functions. @@ -28,10 +28,10 @@ module REXML # XPath.first( doc, "//b"} ) # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } ) # XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) - def XPath::first element, path=nil, namespaces=nil, variables={} + def XPath::first(element, path=nil, namespaces=nil, variables={}, options={}) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path @@ -57,10 +57,10 @@ module REXML # XPath.each( node, 'ancestor::x' ) { |el| ... } # XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \ # {|el| ... } - def XPath::each element, path=nil, namespaces=nil, variables={}, &block + def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path @@ -69,8 +69,8 @@ module REXML end # Returns an array of nodes matching a given XPath. - def XPath::match element, path=nil, namespaces=nil, variables={} - parser = XPathParser.new + def XPath::match(element, path=nil, namespaces=nil, variables={}, options={}) + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 181b2b6..5eb1e5a 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -1,43 +1,51 @@ # frozen_string_literal: false -require 'rexml/namespace' -require 'rexml/xmltokens' -require 'rexml/attribute' -require 'rexml/syncenumerator' -require 'rexml/parsers/xpathparser' - -class Object - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone - clone - end -end -class Symbol - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Integer - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Float - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Array - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object+ types - def dclone - klone = self.clone - klone.clear - self.each{|v| klone << v.dclone} - klone + +require "pp" + +require_relative 'namespace' +require_relative 'xmltokens' +require_relative 'attribute' +require_relative 'parsers/xpathparser' + +module REXML + module DClonable + refine Object do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone + clone + end + end + refine Symbol do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Integer do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Float do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Array do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object+ types + def dclone + klone = self.clone + klone.clear + self.each{|v| klone << v.dclone} + klone + end + end end end +using REXML::DClonable + module REXML # You don't want to use this class. Really. Use XPath, which is a wrapper # for this class. Believe me. You don't want to poke around in here. @@ -47,10 +55,15 @@ module REXML include XMLTokens LITERAL = /^'([^']*)'|^"([^"]*)"/u - def initialize( ) + DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true") + + def initialize(strict: false) + @debug = DEBUG @parser = REXML::Parsers::XPathParser.new @namespaces = nil @variables = {} + @nest = 0 + @strict = strict end def namespaces=( namespaces={} ) @@ -75,7 +88,7 @@ module REXML def predicate path, nodeset path_stack = @parser.parse( path ) - expr( path_stack, nodeset ) + match( path_stack, nodeset ) end def []=( variable_name, value ) @@ -123,13 +136,24 @@ module REXML end - def match( path_stack, nodeset ) - r = expr( path_stack, nodeset ) - r + def match(path_stack, nodeset) + nodeset = nodeset.collect.with_index do |node, i| + position = i + 1 + XPathNode.new(node, position: position) + end + result = expr(path_stack, nodeset) + case result + when Array # nodeset + unnode(result) + else + [result] + end end private - + def strict? + @strict + end # Returns a String namespace for a node, given a prefix # The rules are: @@ -148,343 +172,481 @@ module REXML # Expr takes a stack of path elements and a set of nodes (either a Parent # or an Array and returns an Array of matching nodes - ALL = [ :attribute, :element, :text, :processing_instruction, :comment ] - ELEMENTS = [ :element ] def expr( path_stack, nodeset, context=nil ) - node_types = ELEMENTS + enter(:expr, path_stack, nodeset) if @debug return nodeset if path_stack.length == 0 || nodeset.length == 0 while path_stack.length > 0 + trace(:while, path_stack, nodeset) if @debug if nodeset.length == 0 path_stack.clear return [] end - case (op = path_stack.shift) + op = path_stack.shift + case op when :document - nodeset = [ nodeset[0].root_node ] - - when :qname - prefix = path_stack.shift - name = path_stack.shift - nodeset.delete_if do |node| - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace( node, prefix ) - if node.node_type == :element - if node.name == name - end - end - !(node.node_type == :element and - node.name == name and - node.namespace == ns ) - end - node_types = ELEMENTS - - when :any - nodeset.delete_if { |node| !node_types.include?(node.node_type) } - + first_raw_node = nodeset.first.raw_node + nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)] when :self - # This space left intentionally blank - - when :processing_instruction - target = path_stack.shift - nodeset.delete_if do |node| - (node.node_type != :processing_instruction) or - ( target!='' and ( node.target != target ) ) + nodeset = step(path_stack) do + [nodeset] end - - when :text - nodeset.delete_if { |node| node.node_type != :text } - - when :comment - nodeset.delete_if { |node| node.node_type != :comment } - - when :node - # This space left intentionally blank - node_types = ALL - when :child - new_nodeset = [] - nt = nil - nodeset.each do |node| - nt = node.node_type - new_nodeset += node.children if nt == :element or nt == :document + nodeset = step(path_stack) do + child(nodeset) end - nodeset = new_nodeset - node_types = ELEMENTS - when :literal + trace(:literal, path_stack, nodeset) if @debug return path_stack.shift - when :attribute - new_nodeset = [] - case path_stack.shift - when :qname - prefix = path_stack.shift - name = path_stack.shift - for element in nodeset - if element.node_type == :element - attrib = element.attribute( name, get_namespace(element, prefix) ) - new_nodeset << attrib if attrib + nodeset = step(path_stack, any_type: :attribute) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.node_type == :element + attributes = raw_node.attributes + next if attributes.empty? + nodesets << attributes.each_attribute.collect.with_index do |attribute, i| + XPathNode.new(attribute, position: i + 1) end end - when :any - for element in nodeset - if element.node_type == :element - new_nodeset += element.attributes.to_a + nodesets + end + when :namespace + pre_defined_namespaces = { + "xml" => "http://www.w3.org/XML/1998/namespace", + } + nodeset = step(path_stack, any_type: :namespace) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + if @namespaces + nodesets << pre_defined_namespaces.merge(@namespaces) + else + nodesets << pre_defined_namespaces.merge(raw_node.namespaces) + end + when :attribute + if @namespaces + nodesets << pre_defined_namespaces.merge(@namespaces) + else + nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces) + end end end + nodesets end - nodeset = new_nodeset - when :parent - nodeset = nodeset.collect{|n| n.parent}.compact - #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact) - node_types = ELEMENTS - - when :ancestor - new_nodeset = [] - nodeset.each do |node| - while node.parent - node = node.parent - new_nodeset << node unless new_nodeset.include? node + nodeset = step(path_stack) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + if raw_node.node_type == :attribute + parent = raw_node.element + else + parent = raw_node.parent + end + nodesets << [XPathNode.new(parent, position: 1)] if parent end + nodesets end - nodeset = new_nodeset - node_types = ELEMENTS - - when :ancestor_or_self - new_nodeset = [] - nodeset.each do |node| - if node.node_type == :element - new_nodeset << node - while ( node.parent ) - node = node.parent - new_nodeset << node unless new_nodeset.include? node + when :ancestor + nodeset = step(path_stack) do + nodesets = [] + # new_nodes = {} + nodeset.each do |node| + raw_node = node.raw_node + new_nodeset = [] + while raw_node.parent + raw_node = raw_node.parent + # next if new_nodes.key?(node) + new_nodeset << XPathNode.new(raw_node, + position: new_nodeset.size + 1) + # new_nodes[node] = true end + nodesets << new_nodeset unless new_nodeset.empty? end + nodesets end - nodeset = new_nodeset - node_types = ELEMENTS - - when :predicate - new_nodeset = [] - subcontext = { :size => nodeset.size } - pred = path_stack.shift - nodeset.each_with_index { |node, index| - subcontext[ :node ] = node - subcontext[ :index ] = index+1 - pc = pred.dclone - result = expr( pc, [node], subcontext ) - result = result[0] if result.kind_of? Array and result.length == 1 - if result.kind_of? Numeric - new_nodeset << node if result == (index+1) - elsif result.instance_of? Array - if result.size > 0 and result.inject(false) {|k,s| s or k} - new_nodeset << node if result.size > 0 + when :ancestor_or_self + nodeset = step(path_stack) do + nodesets = [] + # new_nodes = {} + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.node_type == :element + new_nodeset = [XPathNode.new(raw_node, position: 1)] + # new_nodes[node] = true + while raw_node.parent + raw_node = raw_node.parent + # next if new_nodes.key?(node) + new_nodeset << XPathNode.new(raw_node, + position: new_nodeset.size + 1) + # new_nodes[node] = true end - else - new_nodeset << node if result + nodesets << new_nodeset unless new_nodeset.empty? end - } - nodeset = new_nodeset -=begin - predicate = path_stack.shift - ns = nodeset.clone - result = expr( predicate, ns ) - if result.kind_of? Array - nodeset = result.zip(ns).collect{|m,n| n if m}.compact - else - nodeset = result ? nodeset : [] + nodesets end -=end - when :descendant_or_self - rv = descendant_or_self( path_stack, nodeset ) - path_stack.clear - nodeset = rv - node_types = ELEMENTS - + nodeset = step(path_stack) do + descendant(nodeset, true) + end when :descendant - results = [] - nt = nil - nodeset.each do |node| - nt = node.node_type - results += expr( path_stack.dclone.unshift( :descendant_or_self ), - node.children ) if nt == :element or nt == :document + nodeset = step(path_stack) do + descendant(nodeset, false) end - nodeset = results - node_types = ELEMENTS - when :following_sibling - results = [] - nodeset.each do |node| - next if node.parent.nil? - all_siblings = node.parent.children - current_index = all_siblings.index( node ) - following_siblings = all_siblings[ current_index+1 .. -1 ] - results += expr( path_stack.dclone, following_siblings ) + nodeset = step(path_stack) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.respond_to?(:parent) + next if raw_node.parent.nil? + all_siblings = raw_node.parent.children + current_index = all_siblings.index(raw_node) + following_siblings = all_siblings[(current_index + 1)..-1] + next if following_siblings.empty? + nodesets << following_siblings.collect.with_index do |sibling, i| + XPathNode.new(sibling, position: i + 1) + end + end + nodesets end - nodeset = results - when :preceding_sibling - results = [] - nodeset.each do |node| - next if node.parent.nil? - all_siblings = node.parent.children - current_index = all_siblings.index( node ) - preceding_siblings = all_siblings[ 0, current_index ].reverse - results += preceding_siblings + nodeset = step(path_stack, order: :reverse) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.respond_to?(:parent) + next if raw_node.parent.nil? + all_siblings = raw_node.parent.children + current_index = all_siblings.index(raw_node) + preceding_siblings = all_siblings[0, current_index].reverse + next if preceding_siblings.empty? + nodesets << preceding_siblings.collect.with_index do |sibling, i| + XPathNode.new(sibling, position: i + 1) + end + end + nodesets end - nodeset = results - node_types = ELEMENTS - when :preceding - new_nodeset = [] - nodeset.each do |node| - new_nodeset += preceding( node ) + nodeset = step(path_stack, order: :reverse) do + unnode(nodeset) do |node| + preceding(node) + end end - nodeset = new_nodeset - node_types = ELEMENTS - when :following - new_nodeset = [] - nodeset.each do |node| - new_nodeset += following( node ) - end - nodeset = new_nodeset - node_types = ELEMENTS - - when :namespace - new_nodeset = [] - prefix = path_stack.shift - nodeset.each do |node| - if (node.node_type == :element or node.node_type == :attribute) - if @namespaces - namespaces = @namespaces - elsif (node.node_type == :element) - namespaces = node.namespaces - else - namespaces = node.element.namesapces - end - if (node.namespace == namespaces[prefix]) - new_nodeset << node - end + nodeset = step(path_stack) do + unnode(nodeset) do |node| + following(node) end end - nodeset = new_nodeset - when :variable var_name = path_stack.shift - return @variables[ var_name ] + return [@variables[var_name]] - # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq - # TODO: Special case for :or and :and -- not evaluate the right - # operand if the left alone determines result (i.e. is true for - # :or and false for :and). - when :eq, :neq, :lt, :lteq, :gt, :gteq, :or + when :eq, :neq, :lt, :lteq, :gt, :gteq left = expr( path_stack.shift, nodeset.dup, context ) right = expr( path_stack.shift, nodeset.dup, context ) res = equality_relational_compare( left, op, right ) + trace(op, left, right, res) if @debug return res + when :or + left = expr(path_stack.shift, nodeset.dup, context) + return true if Functions.boolean(left) + right = expr(path_stack.shift, nodeset.dup, context) + return Functions.boolean(right) + when :and - left = expr( path_stack.shift, nodeset.dup, context ) - return [] unless left - if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b} - return [] + left = expr(path_stack.shift, nodeset.dup, context) + return false unless Functions.boolean(left) + right = expr(path_stack.shift, nodeset.dup, context) + return Functions.boolean(right) + + when :div, :mod, :mult, :plus, :minus + left = expr(path_stack.shift, nodeset, context) + right = expr(path_stack.shift, nodeset, context) + left = unnode(left) if left.is_a?(Array) + right = unnode(right) if right.is_a?(Array) + left = Functions::number(left) + right = Functions::number(right) + case op + when :div + return left / right + when :mod + return left % right + when :mult + return left * right + when :plus + return left + right + when :minus + return left - right + else + raise "[BUG] Unexpected operator: <#{op.inspect}>" end - right = expr( path_stack.shift, nodeset.dup, context ) - res = equality_relational_compare( left, op, right ) - return res - - when :div - left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f - return (left / right) - - when :mod - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left % right) - - when :mult - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left * right) - - when :plus - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left + right) - - when :minus - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left - right) - when :union left = expr( path_stack.shift, nodeset, context ) right = expr( path_stack.shift, nodeset, context ) + left = unnode(left) if left.is_a?(Array) + right = unnode(right) if right.is_a?(Array) return (left | right) - when :neg res = expr( path_stack, nodeset, context ) - return -(res.to_f) - + res = unnode(res) if res.is_a?(Array) + return -Functions.number(res) when :not when :function func_name = path_stack.shift.tr('-','_') arguments = path_stack.shift - subcontext = context ? nil : { :size => nodeset.size } - - res = [] - cont = context - nodeset.each_with_index { |n, i| - if subcontext - subcontext[:node] = n - subcontext[:index] = i - cont = subcontext + + if nodeset.size != 1 + message = "[BUG] Node set size must be 1 for function call: " + message += "<#{func_name}>: <#{nodeset.inspect}>: " + message += "<#{arguments.inspect}>" + raise message + end + + node = nodeset.first + if context + target_context = context + else + target_context = {:size => nodeset.size} + if node.is_a?(XPathNode) + target_context[:node] = node.raw_node + target_context[:index] = node.position + else + target_context[:node] = node + target_context[:index] = 1 end - arg_clone = arguments.dclone - args = arg_clone.collect { |arg| - expr( arg, [n], cont ) - } - Functions.context = cont - res << Functions.send( func_name, *args ) - } - return res + end + args = arguments.dclone.collect do |arg| + result = expr(arg, nodeset, target_context) + result = unnode(result) if result.is_a?(Array) + result + end + Functions.context = target_context + return Functions.send(func_name, *args) + else + raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>" end end # while return nodeset + ensure + leave(:expr, path_stack, nodeset) if @debug + end + + def step(path_stack, any_type: :element, order: :forward) + nodesets = yield + begin + enter(:step, path_stack, nodesets) if @debug + nodesets = node_test(path_stack, nodesets, any_type: any_type) + while path_stack[0] == :predicate + path_stack.shift # :predicate + predicate_expression = path_stack.shift.dclone + nodesets = evaluate_predicate(predicate_expression, nodesets) + end + if nodesets.size == 1 + ordered_nodeset = nodesets[0] + else + raw_nodes = [] + nodesets.each do |nodeset| + nodeset.each do |node| + if node.respond_to?(:raw_node) + raw_nodes << node.raw_node + else + raw_nodes << node + end + end + end + ordered_nodeset = sort(raw_nodes, order) + end + new_nodeset = [] + ordered_nodeset.each do |node| + # TODO: Remove duplicated + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + end + new_nodeset + ensure + leave(:step, path_stack, new_nodeset) if @debug + end end + def node_test(path_stack, nodesets, any_type: :element) + enter(:node_test, path_stack, nodesets) if @debug + operator = path_stack.shift + case operator + when :qname + prefix = path_stack.shift + name = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + if strict? + raw_node.name == name and raw_node.namespace == "" + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node, prefix) + raw_node.name == name and raw_node.namespace == ns + end + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node, prefix) + raw_node.name == name and raw_node.namespace == ns + end + when :attribute + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + raw_node.name == name and raw_node.namespace == "" + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node.element, prefix) + raw_node.name == name and raw_node.namespace == ns + end + else + false + end + end + end + when :namespace + prefix = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + namespaces = @namespaces || raw_node.namespaces + raw_node.namespace == namespaces[prefix] + when :attribute + namespaces = @namespaces || raw_node.element.namespaces + raw_node.namespace == namespaces[prefix] + else + false + end + end + end + when :any + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == any_type + end + end + when :comment + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == :comment + end + end + when :text + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == :text + end + end + when :processing_instruction + target = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + (raw_node.node_type == :processing_instruction) and + (target.empty? or (raw_node.target == target)) + end + end + when :node + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + true + end + end + else + message = "[BUG] Unexpected node test: " + + "<#{operator.inspect}>: <#{path_stack.inspect}>" + raise message + end + new_nodesets + ensure + leave(:node_test, path_stack, new_nodesets) if @debug + end - ########################################################## - # FIXME - # The next two methods are BAD MOJO! - # This is my achilles heel. If anybody thinks of a better - # way of doing this, be my guest. This really sucks, but - # it is a wonder it works at all. - # ######################################################## + def filter_nodeset(nodeset) + new_nodeset = [] + nodeset.each do |node| + next unless yield(node) + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + end + new_nodeset + end - def descendant_or_self( path_stack, nodeset ) - rs = [] - d_o_s( path_stack, nodeset, rs ) - document_order(rs.flatten.compact) - #rs.flatten.compact + def evaluate_predicate(expression, nodesets) + enter(:predicate, expression, nodesets) if @debug + new_nodeset_count = 0 + new_nodesets = nodesets.collect do |nodeset| + new_nodeset = [] + subcontext = { :size => nodeset.size } + nodeset.each_with_index do |node, index| + if node.is_a?(XPathNode) + subcontext[:node] = node.raw_node + subcontext[:index] = node.position + else + subcontext[:node] = node + subcontext[:index] = index + 1 + end + result = expr(expression.dclone, [node], subcontext) + trace(:predicate_evaluate, expression, node, subcontext, result) if @debug + result = result[0] if result.kind_of? Array and result.length == 1 + if result.kind_of? Numeric + if result == node.position + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + elsif result.instance_of? Array + if result.size > 0 and result.inject(false) {|k,s| s or k} + if result.size > 0 + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + end + else + if result + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + end + end + new_nodeset + end + new_nodesets + ensure + leave(:predicate, new_nodesets) if @debug end - def d_o_s( p, ns, r ) - nt = nil - ns.each_index do |i| - n = ns[i] - x = expr( p.dclone, [ n ] ) - nt = n.node_type - d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0 - r.concat(x) if x.size > 0 + def trace(*args) + indent = " " * @nest + PP.pp(args, "").each_line do |line| + puts("#{indent}#{line}") end end + def enter(tag, *args) + trace(:enter, tag, *args) + @nest += 1 + end + + def leave(tag, *args) + @nest -= 1 + trace(:leave, tag, *args) + end # Reorders an array of nodes so that they are in document order # It tries to do this efficiently. @@ -494,7 +656,7 @@ module REXML # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def document_order( array_of_nodes ) + def sort(array_of_nodes, order) new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -505,42 +667,68 @@ module REXML end new_arry << [ node_idx.reverse, node ] } - new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] } + ordered = new_arry.sort_by do |index, node| + if order == :forward + index + else + -index + end + end + ordered.collect do |_index, node| + node + end end - - def recurse( nodeset, &block ) - for node in nodeset - yield node - recurse( node, &block ) if node.node_type == :element + def descendant(nodeset, include_self) + nodesets = [] + nodeset.each do |node| + new_nodeset = [] + new_nodes = {} + descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self) + nodesets << new_nodeset unless new_nodeset.empty? end + nodesets end + def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) + if include_self + return if new_nodes.key?(raw_node) + new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1) + new_nodes[raw_node] = true + end + node_type = raw_node.node_type + if node_type == :element or node_type == :document + raw_node.children.each do |child| + descendant_recursive(child, new_nodeset, new_nodes, true) + end + end + end # Builds a nodeset of all of the preceding nodes of the supplied node, # in reverse document order # preceding:: includes every element in the document that precedes this node, # except for ancestors - def preceding( node ) + def preceding(node) ancestors = [] - p = node.parent - while p - ancestors << p - p = p.parent + parent = node.parent + while parent + ancestors << parent + parent = parent.parent end - acc = [] - p = preceding_node_of( node ) - while p - if ancestors.include? p - ancestors.delete(p) + precedings = [] + preceding_node = preceding_node_of(node) + while preceding_node + if ancestors.include?(preceding_node) + ancestors.delete(preceding_node) else - acc << p + precedings << XPathNode.new(preceding_node, + position: precedings.size + 1) end - p = preceding_node_of( p ) + preceding_node = preceding_node_of(preceding_node) end - acc + precedings end def preceding_node_of( node ) @@ -558,14 +746,15 @@ module REXML psn end - def following( node ) - acc = [] - p = next_sibling_node( node ) - while p - acc << p - p = following_node_of( p ) + def following(node) + followings = [] + following_node = next_sibling_node(node) + while following_node + followings << XPathNode.new(following_node, + position: followings.size + 1) + following_node = following_node_of(following_node) end - acc + followings end def following_node_of( node ) @@ -587,45 +776,68 @@ module REXML return psn end + def child(nodeset) + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + node_type = raw_node.node_type + # trace(:child, node_type, node) + case node_type + when :element + nodesets << raw_node.children.collect.with_index do |child_node, i| + XPathNode.new(child_node, position: i + 1) + end + when :document + new_nodeset = [] + raw_node.children.each do |child| + case child + when XMLDecl, Text + # Ignore + else + new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1) + end + end + nodesets << new_nodeset unless new_nodeset.empty? + end + end + nodesets + end + def norm b case b when true, false return b when 'true', 'false' return Functions::boolean( b ) - when /^\d+(\.\d+)?$/ + when /^\d+(\.\d+)?$/, Numeric return Functions::number( b ) else return Functions::string( b ) end end - def equality_relational_compare( set1, op, set2 ) + def equality_relational_compare(set1, op, set2) + set1 = unnode(set1) if set1.is_a?(Array) + set2 = unnode(set2) if set2.is_a?(Array) + if set1.kind_of? Array and set2.kind_of? Array - if set1.size == 1 and set2.size == 1 - set1 = set1[0] - set2 = set2[0] - elsif set1.size == 0 or set2.size == 0 - nd = set1.size==0 ? set2 : set1 - rv = nd.collect { |il| compare( il, op, nil ) } - return rv - else - res = [] - SyncEnumerator.new( set1, set2 ).each { |i1, i2| - i1 = norm( i1 ) - i2 = norm( i2 ) - res << compare( i1, op, i2 ) - } - return res + # If both objects to be compared are node-sets, then the + # comparison will be true if and only if there is a node in the + # first node-set and a node in the second node-set such that the + # result of performing the comparison on the string-values of + # the two nodes is true. + set1.product(set2).any? do |node1, node2| + node_string1 = Functions.string(node1) + node_string2 = Functions.string(node2) + compare(node_string1, op, node_string2) end - end - # If one is nodeset and other is number, compare number to each item - # in nodeset s.t. number op number(string(item)) - # If one is nodeset and other is string, compare string to each item - # in nodeset s.t. string op string(item) - # If one is nodeset and other is boolean, compare boolean to each item - # in nodeset s.t. boolean op boolean(item) - if set1.kind_of? Array or set2.kind_of? Array + elsif set1.kind_of? Array or set2.kind_of? Array + # If one is nodeset and other is number, compare number to each item + # in nodeset s.t. number op number(string(item)) + # If one is nodeset and other is string, compare string to each item + # in nodeset s.t. string op string(item) + # If one is nodeset and other is boolean, compare boolean to each item + # in nodeset s.t. boolean op boolean(item) if set1.kind_of? Array a = set1 b = set2 @@ -636,15 +848,23 @@ module REXML case b when true, false - return a.collect {|v| compare( Functions::boolean(v), op, b ) } + each_unnode(a).any? do |unnoded| + compare(Functions.boolean(unnoded), op, b) + end when Numeric - return a.collect {|v| compare( Functions::number(v), op, b )} - when /^\d+(\.\d+)?$/ - b = Functions::number( b ) - return a.collect {|v| compare( Functions::number(v), op, b )} + each_unnode(a).any? do |unnoded| + compare(Functions.number(unnoded), op, b) + end + when /\A\d+(\.\d+)?\z/ + b = Functions.number(b) + each_unnode(a).any? do |unnoded| + compare(Functions.number(unnoded), op, b) + end else - b = Functions::string( b ) - return a.collect { |v| compare( Functions::string(v), op, b ) } + b = Functions::string(b) + each_unnode(a).any? do |unnoded| + compare(Functions::string(unnoded), op, b) + end end else # If neither is nodeset, @@ -654,32 +874,52 @@ module REXML # Else, convert to string # Else # Convert both to numbers and compare - s1 = set1.to_s - s2 = set2.to_s - if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false' - set1 = Functions::boolean( set1 ) - set2 = Functions::boolean( set2 ) + compare(set1, op, set2) + end + end + + def value_type(value) + case value + when true, false + :boolean + when Numeric + :number + when String + :string + else + raise "[BUG] Unexpected value type: <#{value.inspect}>" + end + end + + def normalize_compare_values(a, operator, b) + a_type = value_type(a) + b_type = value_type(b) + case operator + when :eq, :neq + if a_type == :boolean or b_type == :boolean + a = Functions.boolean(a) unless a_type == :boolean + b = Functions.boolean(b) unless b_type == :boolean + elsif a_type == :number or b_type == :number + a = Functions.number(a) unless a_type == :number + b = Functions.number(b) unless b_type == :number else - if op == :eq or op == :neq - if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/ - set1 = Functions::number( s1 ) - set2 = Functions::number( s2 ) - else - set1 = Functions::string( set1 ) - set2 = Functions::string( set2 ) - end - else - set1 = Functions::number( set1 ) - set2 = Functions::number( set2 ) - end + a = Functions.string(a) unless a_type == :string + b = Functions.string(b) unless b_type == :string end - return compare( set1, op, set2 ) + when :lt, :lteq, :gt, :gteq + a = Functions.number(a) unless a_type == :number + b = Functions.number(b) unless b_type == :number + else + message = "[BUG] Unexpected compare operator: " + + "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>" + raise message end - return false + [a, b] end - def compare a, op, b - case op + def compare(a, operator, b) + a, b = normalize_compare_values(a, operator, b) + case operator when :eq a == b when :neq @@ -692,13 +932,47 @@ module REXML a > b when :gteq a >= b - when :and - a and b - when :or - a or b else - false + message = "[BUG] Unexpected compare operator: " + + "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>" + raise message + end + end + + def each_unnode(nodeset) + return to_enum(__method__, nodeset) unless block_given? + nodeset.each do |node| + if node.is_a?(XPathNode) + unnoded = node.raw_node + else + unnoded = node + end + yield(unnoded) + end + end + + def unnode(nodeset) + each_unnode(nodeset).collect do |unnoded| + unnoded = yield(unnoded) if block_given? + unnoded + end + end + end + + # @private + class XPathNode + attr_reader :raw_node, :context + def initialize(node, context=nil) + if node.is_a?(XPathNode) + @raw_node = node.raw_node + else + @raw_node = node end + @context = context || {} + end + + def position + @context[:position] end end end -- 2.27.0