7495 lines
253 KiB
Diff
7495 lines
253 KiB
Diff
From 20017eea807e8fa386aa5c79ae779004d8b366dd Mon Sep 17 00:00:00 2001
|
|
From: Sutou Kouhei <kou@clear-code.com>
|
|
Date: Tue, 25 Jun 2024 11:26:33 +0900
|
|
Subject: [PATCH] Add 3.3.1 entry
|
|
|
|
Backport from https://github.com/ruby/rexml/tree/v3.3.1/lib/rexml
|
|
|
|
---
|
|
lib/rexml/attlistdecl.rb | 4 +-
|
|
lib/rexml/attribute.rb | 54 +-
|
|
lib/rexml/cdata.rb | 2 +-
|
|
lib/rexml/child.rb | 2 +-
|
|
lib/rexml/comment.rb | 2 +-
|
|
lib/rexml/doctype.rb | 49 +-
|
|
lib/rexml/document.rb | 256 ++-
|
|
lib/rexml/dtd/attlistdecl.rb | 2 +-
|
|
lib/rexml/dtd/dtd.rb | 12 +-
|
|
lib/rexml/dtd/elementdecl.rb | 2 +-
|
|
lib/rexml/dtd/entitydecl.rb | 2 +-
|
|
lib/rexml/dtd/notationdecl.rb | 2 +-
|
|
lib/rexml/element.rb | 2297 +++++++++++++++++-----
|
|
lib/rexml/entity.rb | 48 +-
|
|
lib/rexml/formatters/default.rb | 12 +-
|
|
lib/rexml/formatters/pretty.rb | 6 +-
|
|
lib/rexml/formatters/transitive.rb | 2 +-
|
|
lib/rexml/functions.rb | 105 +-
|
|
lib/rexml/instruction.rb | 32 +-
|
|
lib/rexml/light/node.rb | 12 +-
|
|
lib/rexml/namespace.rb | 29 +-
|
|
lib/rexml/node.rb | 18 +-
|
|
lib/rexml/output.rb | 2 +-
|
|
lib/rexml/parent.rb | 2 +-
|
|
lib/rexml/parseexception.rb | 1 +
|
|
lib/rexml/parsers/baseparser.rb | 551 +++---
|
|
lib/rexml/parsers/lightparser.rb | 6 +-
|
|
lib/rexml/parsers/pullparser.rb | 6 +-
|
|
lib/rexml/parsers/sax2parser.rb | 8 +-
|
|
lib/rexml/parsers/streamparser.rb | 2 +-
|
|
lib/rexml/parsers/treeparser.rb | 27 +-
|
|
lib/rexml/parsers/ultralightparser.rb | 4 +-
|
|
lib/rexml/parsers/xpathparser.rb | 332 ++--
|
|
lib/rexml/quickpath.rb | 4 +-
|
|
lib/rexml/rexml.rb | 55 +-
|
|
lib/rexml/source.rb | 223 ++-
|
|
lib/rexml/syncenumerator.rb | 33 -
|
|
lib/rexml/text.rb | 76 +-
|
|
lib/rexml/undefinednamespaceexception.rb | 2 +-
|
|
lib/rexml/validation/relaxng.rb | 4 +-
|
|
lib/rexml/validation/validation.rb | 2 +-
|
|
lib/rexml/xmldecl.rb | 40 +-
|
|
lib/rexml/xpath.rb | 16 +-
|
|
lib/rexml/xpath_parser.rb | 1052 ++++++----
|
|
44 files changed, 3677 insertions(+), 1721 deletions(-)
|
|
delete mode 100644 lib/rexml/syncenumerator.rb
|
|
|
|
diff --git a/lib/rexml/attlistdecl.rb b/lib/rexml/attlistdecl.rb
|
|
index dc1d2ad..44a91d6 100644
|
|
--- a/lib/rexml/attlistdecl.rb
|
|
+++ b/lib/rexml/attlistdecl.rb
|
|
@@ -1,7 +1,7 @@
|
|
# frozen_string_literal: false
|
|
#vim:ts=2 sw=2 noexpandtab:
|
|
-require 'rexml/child'
|
|
-require 'rexml/source'
|
|
+require_relative 'child'
|
|
+require_relative 'source'
|
|
|
|
module REXML
|
|
# This class needs:
|
|
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
|
|
index ca5984e..11893a9 100644
|
|
--- a/lib/rexml/attribute.rb
|
|
+++ b/lib/rexml/attribute.rb
|
|
@@ -1,6 +1,6 @@
|
|
-# frozen_string_literal: false
|
|
-require "rexml/namespace"
|
|
-require 'rexml/text'
|
|
+# frozen_string_literal: true
|
|
+require_relative "namespace"
|
|
+require_relative 'text'
|
|
|
|
module REXML
|
|
# Defines an Element Attribute; IE, a attribute=value pair, as in:
|
|
@@ -13,9 +13,6 @@ module REXML
|
|
|
|
# The element to which this attribute belongs
|
|
attr_reader :element
|
|
- # The normalized value of this attribute. That is, the attribute with
|
|
- # entities intact.
|
|
- attr_writer :normalized
|
|
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
|
|
|
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
|
@@ -67,15 +64,11 @@ module REXML
|
|
# e.add_attribute( "nsa:a", "aval" )
|
|
# e.add_attribute( "b", "bval" )
|
|
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
|
|
- # e.attributes.get_attribute( "b" ).prefix # -> "elns"
|
|
+ # e.attributes.get_attribute( "b" ).prefix # -> ""
|
|
# a = Attribute.new( "x", "y" )
|
|
# a.prefix # -> ""
|
|
def prefix
|
|
- pf = super
|
|
- if pf == ""
|
|
- pf = @element.prefix if @element
|
|
- end
|
|
- pf
|
|
+ super
|
|
end
|
|
|
|
# Returns the namespace URL, if defined, or nil otherwise
|
|
@@ -86,9 +79,26 @@ module REXML
|
|
# e.add_attribute("nsx:a", "c")
|
|
# e.attribute("ns:a").namespace # => "http://url"
|
|
# e.attribute("nsx:a").namespace # => nil
|
|
+ #
|
|
+ # This method always returns "" for no namespace attribute. Because
|
|
+ # the default namespace doesn't apply to attribute names.
|
|
+ #
|
|
+ # From https://www.w3.org/TR/xml-names/#uniqAttrs
|
|
+ #
|
|
+ # > the default namespace does not apply to attribute names
|
|
+ #
|
|
+ # e = REXML::Element.new("el")
|
|
+ # e.add_namespace("", "http://example.com/")
|
|
+ # e.namespace # => "http://example.com/"
|
|
+ # e.add_attribute("a", "b")
|
|
+ # e.attribute("a").namespace # => ""
|
|
def namespace arg=nil
|
|
arg = prefix if arg.nil?
|
|
- @element.namespace arg
|
|
+ if arg == ""
|
|
+ ""
|
|
+ else
|
|
+ @element.namespace(arg)
|
|
+ end
|
|
end
|
|
|
|
# Returns true if other is an Attribute and has the same name and value,
|
|
@@ -109,10 +119,13 @@ module REXML
|
|
# b = Attribute.new( "ns:x", "y" )
|
|
# b.to_string # -> "ns:x='y'"
|
|
def to_string
|
|
+ value = to_s
|
|
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
|
- %Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^
|
|
+ value = value.gsub('"', '"') if value.include?('"')
|
|
+ %Q^#@expanded_name="#{value}"^
|
|
else
|
|
- "#@expanded_name='#{to_s().gsub(/'/, ''')}'"
|
|
+ value = value.gsub("'", ''') if value.include?("'")
|
|
+ "#@expanded_name='#{value}'"
|
|
end
|
|
end
|
|
|
|
@@ -128,7 +141,6 @@ module REXML
|
|
return @normalized if @normalized
|
|
|
|
@normalized = Text::normalize( @unnormalized, doctype )
|
|
- @unnormalized = nil
|
|
@normalized
|
|
end
|
|
|
|
@@ -137,10 +149,16 @@ module REXML
|
|
def value
|
|
return @unnormalized if @unnormalized
|
|
@unnormalized = Text::unnormalize( @normalized, doctype )
|
|
- @normalized = nil
|
|
@unnormalized
|
|
end
|
|
|
|
+ # The normalized value of this attribute. That is, the attribute with
|
|
+ # entities intact.
|
|
+ def normalized=(new_normalized)
|
|
+ @normalized = new_normalized
|
|
+ @unnormalized = nil
|
|
+ end
|
|
+
|
|
# Returns a copy of this attribute
|
|
def clone
|
|
Attribute.new self
|
|
@@ -177,7 +195,7 @@ module REXML
|
|
end
|
|
|
|
def inspect
|
|
- rv = ""
|
|
+ rv = +""
|
|
write( rv )
|
|
rv
|
|
end
|
|
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb
|
|
index 2238446..997f5a0 100644
|
|
--- a/lib/rexml/cdata.rb
|
|
+++ b/lib/rexml/cdata.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/text"
|
|
+require_relative "text"
|
|
|
|
module REXML
|
|
class CData < Text
|
|
diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb
|
|
index d23451e..cc6e9a4 100644
|
|
--- a/lib/rexml/child.rb
|
|
+++ b/lib/rexml/child.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/node"
|
|
+require_relative "node"
|
|
|
|
module REXML
|
|
##
|
|
diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb
|
|
index 822fe0d..52c58b4 100644
|
|
--- a/lib/rexml/comment.rb
|
|
+++ b/lib/rexml/comment.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "child"
|
|
|
|
module REXML
|
|
##
|
|
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
|
|
index cb9bf57..f359048 100644
|
|
--- a/lib/rexml/doctype.rb
|
|
+++ b/lib/rexml/doctype.rb
|
|
@@ -1,20 +1,25 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/parent"
|
|
-require "rexml/parseexception"
|
|
-require "rexml/namespace"
|
|
-require 'rexml/entity'
|
|
-require 'rexml/attlistdecl'
|
|
-require 'rexml/xmltokens'
|
|
+require_relative "parent"
|
|
+require_relative "parseexception"
|
|
+require_relative "namespace"
|
|
+require_relative 'entity'
|
|
+require_relative 'attlistdecl'
|
|
+require_relative 'xmltokens'
|
|
|
|
module REXML
|
|
class ReferenceWriter
|
|
def initialize(id_type,
|
|
public_id_literal,
|
|
- system_literal)
|
|
+ system_literal,
|
|
+ context=nil)
|
|
@id_type = id_type
|
|
@public_id_literal = public_id_literal
|
|
@system_literal = system_literal
|
|
- @default_quote = "\""
|
|
+ if context and context[:prologue_quote] == :apostrophe
|
|
+ @default_quote = "'"
|
|
+ else
|
|
+ @default_quote = "\""
|
|
+ end
|
|
end
|
|
|
|
def write(output)
|
|
@@ -150,7 +155,8 @@ module REXML
|
|
if @external_id
|
|
reference_writer = ReferenceWriter.new(@external_id,
|
|
@long_name,
|
|
- @uri)
|
|
+ @uri,
|
|
+ context)
|
|
reference_writer.write(output)
|
|
end
|
|
unless @children.empty?
|
|
@@ -165,7 +171,11 @@ module REXML
|
|
end
|
|
|
|
def context
|
|
- @parent.context
|
|
+ if @parent
|
|
+ @parent.context
|
|
+ else
|
|
+ nil
|
|
+ end
|
|
end
|
|
|
|
def entity( name )
|
|
@@ -187,7 +197,7 @@ module REXML
|
|
when "SYSTEM"
|
|
nil
|
|
when "PUBLIC"
|
|
- strip_quotes(@long_name)
|
|
+ @long_name
|
|
end
|
|
end
|
|
|
|
@@ -197,9 +207,9 @@ module REXML
|
|
def system
|
|
case @external_id
|
|
when "SYSTEM"
|
|
- strip_quotes(@long_name)
|
|
+ @long_name
|
|
when "PUBLIC"
|
|
- @uri.kind_of?(String) ? strip_quotes(@uri) : nil
|
|
+ @uri.kind_of?(String) ? @uri : nil
|
|
end
|
|
end
|
|
|
|
@@ -221,15 +231,6 @@ module REXML
|
|
notation_decl.name == name
|
|
}
|
|
end
|
|
-
|
|
- private
|
|
-
|
|
- # Method contributed by Henrik Martensson
|
|
- def strip_quotes(quoted_string)
|
|
- quoted_string =~ /^[\'\"].*[\'\"]$/ ?
|
|
- quoted_string[1, quoted_string.length-2] :
|
|
- quoted_string
|
|
- end
|
|
end
|
|
|
|
# We don't really handle any of these since we're not a validating
|
|
@@ -287,8 +288,10 @@ module REXML
|
|
end
|
|
|
|
def to_s
|
|
+ context = nil
|
|
+ context = parent.context if parent
|
|
notation = "<!NOTATION #{@name}"
|
|
- reference_writer = ReferenceWriter.new(@middle, @public, @system)
|
|
+ reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
|
|
reference_writer.write(notation)
|
|
notation << ">"
|
|
notation
|
|
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
|
|
index 806bc49..b1caa02 100644
|
|
--- a/lib/rexml/document.rb
|
|
+++ b/lib/rexml/document.rb
|
|
@@ -1,38 +1,94 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/security"
|
|
-require "rexml/element"
|
|
-require "rexml/xmldecl"
|
|
-require "rexml/source"
|
|
-require "rexml/comment"
|
|
-require "rexml/doctype"
|
|
-require "rexml/instruction"
|
|
-require "rexml/rexml"
|
|
-require "rexml/parseexception"
|
|
-require "rexml/output"
|
|
-require "rexml/parsers/baseparser"
|
|
-require "rexml/parsers/streamparser"
|
|
-require "rexml/parsers/treeparser"
|
|
+require_relative "security"
|
|
+require_relative "element"
|
|
+require_relative "xmldecl"
|
|
+require_relative "source"
|
|
+require_relative "comment"
|
|
+require_relative "doctype"
|
|
+require_relative "instruction"
|
|
+require_relative "rexml"
|
|
+require_relative "parseexception"
|
|
+require_relative "output"
|
|
+require_relative "parsers/baseparser"
|
|
+require_relative "parsers/streamparser"
|
|
+require_relative "parsers/treeparser"
|
|
|
|
module REXML
|
|
- # Represents a full XML document, including PIs, a doctype, etc. A
|
|
- # Document has a single child that can be accessed by root().
|
|
- # Note that if you want to have an XML declaration written for a document
|
|
- # you create, you must add one; REXML documents do not write a default
|
|
- # declaration for you. See |DECLARATION| and |write|.
|
|
+ # Represents an XML document.
|
|
+ #
|
|
+ # A document may have:
|
|
+ #
|
|
+ # - A single child that may be accessed via method #root.
|
|
+ # - An XML declaration.
|
|
+ # - A document type.
|
|
+ # - Processing instructions.
|
|
+ #
|
|
+ # == In a Hurry?
|
|
+ #
|
|
+ # If you're somewhat familiar with XML
|
|
+ # and have a particular task in mind,
|
|
+ # you may want to see the
|
|
+ # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html],
|
|
+ # and in particular, the
|
|
+ # {tasks page for documents}[../doc/rexml/tasks/tocs/document_toc_rdoc.html].
|
|
+ #
|
|
class Document < Element
|
|
- # A convenient default XML declaration. If you want an XML declaration,
|
|
- # the easiest way to add one is mydoc << Document::DECLARATION
|
|
- # +DEPRECATED+
|
|
- # Use: mydoc << XMLDecl.default
|
|
+ # A convenient default XML declaration. Use:
|
|
+ #
|
|
+ # mydoc << XMLDecl.default
|
|
+ #
|
|
DECLARATION = XMLDecl.default
|
|
|
|
- # Constructor
|
|
- # @param source if supplied, must be a Document, String, or IO.
|
|
- # Documents have their context and Element attributes cloned.
|
|
- # Strings are expected to be valid XML documents. IOs are expected
|
|
- # to be sources of valid XML documents.
|
|
- # @param context if supplied, contains the context of the document;
|
|
- # this should be a Hash.
|
|
+ # :call-seq:
|
|
+ # new(string = nil, context = {}) -> new_document
|
|
+ # new(io_stream = nil, context = {}) -> new_document
|
|
+ # new(document = nil, context = {}) -> new_document
|
|
+ #
|
|
+ # Returns a new \REXML::Document object.
|
|
+ #
|
|
+ # When no arguments are given,
|
|
+ # returns an empty document:
|
|
+ #
|
|
+ # d = REXML::Document.new
|
|
+ # d.to_s # => ""
|
|
+ #
|
|
+ # When argument +string+ is given, it must be a string
|
|
+ # containing a valid XML document:
|
|
+ #
|
|
+ # xml_string = '<root><foo>Foo</foo><bar>Bar</bar></root>'
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
|
|
+ #
|
|
+ # When argument +io_stream+ is given, it must be an \IO object
|
|
+ # that is opened for reading, and when read must return a valid XML document:
|
|
+ #
|
|
+ # File.write('t.xml', xml_string)
|
|
+ # d = File.open('t.xml', 'r') do |io|
|
|
+ # REXML::Document.new(io)
|
|
+ # end
|
|
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
|
|
+ #
|
|
+ # When argument +document+ is given, it must be an existing
|
|
+ # document object, whose context and attributes (but not children)
|
|
+ # are cloned into the new document:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.children # => [<root> ... </>]
|
|
+ # d.context = {raw: :all, compress_whitespace: :all}
|
|
+ # d.add_attributes({'bar' => 0, 'baz' => 1})
|
|
+ # d1 = REXML::Document.new(d)
|
|
+ # d1.children # => []
|
|
+ # d1.context # => {:raw=>:all, :compress_whitespace=>:all}
|
|
+ # d1.attributes # => {"bar"=>bar='0', "baz"=>baz='1'}
|
|
+ #
|
|
+ # When argument +context+ is given, it must be a hash
|
|
+ # containing context entries for the document;
|
|
+ # see {Element Context}[../doc/rexml/context_rdoc.html]:
|
|
+ #
|
|
+ # context = {raw: :all, compress_whitespace: :all}
|
|
+ # d = REXML::Document.new(xml_string, context)
|
|
+ # d.context # => {:raw=>:all, :compress_whitespace=>:all}
|
|
+ #
|
|
def initialize( source = nil, context = {} )
|
|
@entity_expansion_count = 0
|
|
super()
|
|
@@ -46,26 +102,71 @@ module REXML
|
|
end
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # node_type -> :document
|
|
+ #
|
|
+ # Returns the symbol +:document+.
|
|
+ #
|
|
def node_type
|
|
:document
|
|
end
|
|
|
|
- # Should be obvious
|
|
+ # :call-seq:
|
|
+ # clone -> new_document
|
|
+ #
|
|
+ # Returns the new document resulting from executing
|
|
+ # <tt>Document.new(self)</tt>. See Document.new.
|
|
+ #
|
|
def clone
|
|
Document.new self
|
|
end
|
|
|
|
- # According to the XML spec, a root node has no expanded name
|
|
+ # :call-seq:
|
|
+ # expanded_name -> empty_string
|
|
+ #
|
|
+ # Returns an empty string.
|
|
+ #
|
|
def expanded_name
|
|
''
|
|
#d = doc_type
|
|
#d ? d.name : "UNDEFINED"
|
|
end
|
|
-
|
|
alias :name :expanded_name
|
|
|
|
- # We override this, because XMLDecls and DocTypes must go at the start
|
|
- # of the document
|
|
+ # :call-seq:
|
|
+ # add(xml_decl) -> self
|
|
+ # add(doc_type) -> self
|
|
+ # add(object) -> self
|
|
+ #
|
|
+ # Adds an object to the document; returns +self+.
|
|
+ #
|
|
+ # When argument +xml_decl+ is given,
|
|
+ # it must be an REXML::XMLDecl object,
|
|
+ # which becomes the XML declaration for the document,
|
|
+ # replacing the previous XML declaration if any:
|
|
+ #
|
|
+ # d = REXML::Document.new
|
|
+ # d.xml_decl.to_s # => ""
|
|
+ # d.add(REXML::XMLDecl.new('2.0'))
|
|
+ # d.xml_decl.to_s # => "<?xml version='2.0'?>"
|
|
+ #
|
|
+ # When argument +doc_type+ is given,
|
|
+ # it must be an REXML::DocType object,
|
|
+ # which becomes the document type for the document,
|
|
+ # replacing the previous document type, if any:
|
|
+ #
|
|
+ # d = REXML::Document.new
|
|
+ # d.doctype.to_s # => ""
|
|
+ # d.add(REXML::DocType.new('foo'))
|
|
+ # d.doctype.to_s # => "<!DOCTYPE foo>"
|
|
+ #
|
|
+ # When argument +object+ (not an REXML::XMLDecl or REXML::DocType object)
|
|
+ # is given it is added as the last child:
|
|
+ #
|
|
+ # d = REXML::Document.new
|
|
+ # d.add(REXML::Element.new('foo'))
|
|
+ # d.to_s # => "<foo/>"
|
|
+ #
|
|
def add( child )
|
|
if child.kind_of? XMLDecl
|
|
if @children[0].kind_of? XMLDecl
|
|
@@ -99,49 +200,108 @@ module REXML
|
|
end
|
|
alias :<< :add
|
|
|
|
+ # :call-seq:
|
|
+ # add_element(name_or_element = nil, attributes = nil) -> new_element
|
|
+ #
|
|
+ # Adds an element to the document by calling REXML::Element.add_element:
|
|
+ #
|
|
+ # REXML::Element.add_element(name_or_element, attributes)
|
|
def add_element(arg=nil, arg2=nil)
|
|
rv = super
|
|
raise "attempted adding second root element to document" if @elements.size > 1
|
|
rv
|
|
end
|
|
|
|
- # @return the root Element of the document, or nil if this document
|
|
- # has no children.
|
|
+ # :call-seq:
|
|
+ # root -> root_element or nil
|
|
+ #
|
|
+ # Returns the root element of the document, if it exists, otherwise +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new('<root></root>')
|
|
+ # d.root # => <root/>
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.root # => nil
|
|
+ #
|
|
def root
|
|
elements[1]
|
|
#self
|
|
#@children.find { |item| item.kind_of? Element }
|
|
end
|
|
|
|
- # @return the DocType child of the document, if one exists,
|
|
- # and nil otherwise.
|
|
+ # :call-seq:
|
|
+ # doctype -> doc_type or nil
|
|
+ #
|
|
+ # Returns the DocType object for the document, if it exists, otherwise +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new('<!DOCTYPE document SYSTEM "subjects.dtd">')
|
|
+ # d.doctype.class # => REXML::DocType
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.doctype.class # => nil
|
|
+ #
|
|
def doctype
|
|
@children.find { |item| item.kind_of? DocType }
|
|
end
|
|
|
|
- # @return the XMLDecl of this document; if no XMLDecl has been
|
|
- # set, the default declaration is returned.
|
|
+ # :call-seq:
|
|
+ # xml_decl -> xml_decl
|
|
+ #
|
|
+ # Returns the XMLDecl object for the document, if it exists,
|
|
+ # otherwise the default XMLDecl object:
|
|
+ #
|
|
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-8"?>')
|
|
+ # d.xml_decl.class # => REXML::XMLDecl
|
|
+ # d.xml_decl.to_s # => "<?xml version='1.0' encoding='UTF-8'?>"
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.xml_decl.class # => REXML::XMLDecl
|
|
+ # d.xml_decl.to_s # => ""
|
|
+ #
|
|
def xml_decl
|
|
rv = @children[0]
|
|
return rv if rv.kind_of? XMLDecl
|
|
@children.unshift(XMLDecl.default)[0]
|
|
end
|
|
|
|
- # @return the XMLDecl version of this document as a String.
|
|
- # If no XMLDecl has been set, returns the default version.
|
|
+ # :call-seq:
|
|
+ # version -> version_string
|
|
+ #
|
|
+ # Returns the XMLDecl version of this document as a string,
|
|
+ # if it has been set, otherwise the default version:
|
|
+ #
|
|
+ # d = REXML::Document.new('<?xml version="2.0" encoding="UTF-8"?>')
|
|
+ # d.version # => "2.0"
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.version # => "1.0"
|
|
+ #
|
|
def version
|
|
xml_decl().version
|
|
end
|
|
|
|
- # @return the XMLDecl encoding of this document as an
|
|
- # Encoding object.
|
|
- # If no XMLDecl has been set, returns the default encoding.
|
|
+ # :call-seq:
|
|
+ # encoding -> encoding_string
|
|
+ #
|
|
+ # Returns the XMLDecl encoding of the document,
|
|
+ # if it has been set, otherwise the default encoding:
|
|
+ #
|
|
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-16"?>')
|
|
+ # d.encoding # => "UTF-16"
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.encoding # => "UTF-8"
|
|
+ #
|
|
def encoding
|
|
xml_decl().encoding
|
|
end
|
|
|
|
- # @return the XMLDecl standalone value of this document as a String.
|
|
- # If no XMLDecl has been set, returns the default setting.
|
|
+ # :call-seq:
|
|
+ # stand_alone?
|
|
+ #
|
|
+ # Returns the XMLDecl standalone value of the document as a string,
|
|
+ # if it has been set, otherwise the default standalone value:
|
|
+ #
|
|
+ # d = REXML::Document.new('<?xml standalone="yes"?>')
|
|
+ # d.stand_alone? # => "yes"
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.stand_alone? # => nil
|
|
+ #
|
|
def stand_alone?
|
|
xml_decl().stand_alone?
|
|
end
|
|
@@ -226,7 +386,7 @@ module REXML
|
|
end
|
|
formatter = if indent > -1
|
|
if transitive
|
|
- require "rexml/formatters/transitive"
|
|
+ require_relative "formatters/transitive"
|
|
REXML::Formatters::Transitive.new( indent, ie_hack )
|
|
else
|
|
REXML::Formatters::Pretty.new( indent, ie_hack )
|
|
diff --git a/lib/rexml/dtd/attlistdecl.rb b/lib/rexml/dtd/attlistdecl.rb
|
|
index 32847da..1326cb2 100644
|
|
--- a/lib/rexml/dtd/attlistdecl.rb
|
|
+++ b/lib/rexml/dtd/attlistdecl.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "../child"
|
|
module REXML
|
|
module DTD
|
|
class AttlistDecl < Child
|
|
diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb
|
|
index 927d5d8..8b0f2d7 100644
|
|
--- a/lib/rexml/dtd/dtd.rb
|
|
+++ b/lib/rexml/dtd/dtd.rb
|
|
@@ -1,10 +1,10 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/dtd/elementdecl"
|
|
-require "rexml/dtd/entitydecl"
|
|
-require "rexml/comment"
|
|
-require "rexml/dtd/notationdecl"
|
|
-require "rexml/dtd/attlistdecl"
|
|
-require "rexml/parent"
|
|
+require_relative "elementdecl"
|
|
+require_relative "entitydecl"
|
|
+require_relative "../comment"
|
|
+require_relative "notationdecl"
|
|
+require_relative "attlistdecl"
|
|
+require_relative "../parent"
|
|
|
|
module REXML
|
|
module DTD
|
|
diff --git a/lib/rexml/dtd/elementdecl.rb b/lib/rexml/dtd/elementdecl.rb
|
|
index 119fd41..20ed023 100644
|
|
--- a/lib/rexml/dtd/elementdecl.rb
|
|
+++ b/lib/rexml/dtd/elementdecl.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "../child"
|
|
module REXML
|
|
module DTD
|
|
class ElementDecl < Child
|
|
diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb
|
|
index 45707e2..312df65 100644
|
|
--- a/lib/rexml/dtd/entitydecl.rb
|
|
+++ b/lib/rexml/dtd/entitydecl.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "../child"
|
|
module REXML
|
|
module DTD
|
|
class EntityDecl < Child
|
|
diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb
|
|
index cfdf0b9..04a9b08 100644
|
|
--- a/lib/rexml/dtd/notationdecl.rb
|
|
+++ b/lib/rexml/dtd/notationdecl.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "../child"
|
|
module REXML
|
|
module DTD
|
|
class NotationDecl < Child
|
|
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
|
|
index ac9b108..a5808d7 100644
|
|
--- a/lib/rexml/element.rb
|
|
+++ b/lib/rexml/element.rb
|
|
@@ -1,23 +1,273 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/parent"
|
|
-require "rexml/namespace"
|
|
-require "rexml/attribute"
|
|
-require "rexml/cdata"
|
|
-require "rexml/xpath"
|
|
-require "rexml/parseexception"
|
|
+require_relative "parent"
|
|
+require_relative "namespace"
|
|
+require_relative "attribute"
|
|
+require_relative "cdata"
|
|
+require_relative "xpath"
|
|
+require_relative "parseexception"
|
|
|
|
module REXML
|
|
- # An implementation note about namespaces:
|
|
- # As we parse, when we find namespaces we put them in a hash and assign
|
|
- # them a unique ID. We then convert the namespace prefix for the node
|
|
- # to the unique ID. This makes namespace lookup much faster for the
|
|
- # cost of extra memory use. We save the namespace prefix for the
|
|
- # context node and convert it back when we write it.
|
|
- @@namespaces = {}
|
|
-
|
|
- # Represents a tagged XML element. Elements are characterized by
|
|
- # having children, attributes, and names, and can themselves be
|
|
- # children.
|
|
+ # An \REXML::Element object represents an XML element.
|
|
+ #
|
|
+ # An element:
|
|
+ #
|
|
+ # - Has a name (string).
|
|
+ # - May have a parent (another element).
|
|
+ # - Has zero or more children
|
|
+ # (other elements, text, CDATA, processing instructions, and comments).
|
|
+ # - Has zero or more siblings
|
|
+ # (other elements, text, CDATA, processing instructions, and comments).
|
|
+ # - Has zero or more named attributes.
|
|
+ #
|
|
+ # == In a Hurry?
|
|
+ #
|
|
+ # If you're somewhat familiar with XML
|
|
+ # and have a particular task in mind,
|
|
+ # you may want to see the
|
|
+ # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html],
|
|
+ # and in particular, the
|
|
+ # {tasks page for elements}[../doc/rexml/tasks/tocs/element_toc_rdoc.html].
|
|
+ #
|
|
+ # === Name
|
|
+ #
|
|
+ # An element has a name, which is initially set when the element is created:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.name # => "foo"
|
|
+ #
|
|
+ # The name may be changed:
|
|
+ #
|
|
+ # e.name = 'bar'
|
|
+ # e.name # => "bar"
|
|
+ #
|
|
+ #
|
|
+ # === \Parent
|
|
+ #
|
|
+ # An element may have a parent.
|
|
+ #
|
|
+ # Its parent may be assigned explicitly when the element is created:
|
|
+ #
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e1 = REXML::Element.new('bar', e0)
|
|
+ # e1.parent # => <foo> ... </>
|
|
+ #
|
|
+ # Note: the representation of an element always shows the element's name.
|
|
+ # If the element has children, the representation indicates that
|
|
+ # by including an ellipsis (<tt>...</tt>).
|
|
+ #
|
|
+ # The parent may be assigned explicitly at any time:
|
|
+ #
|
|
+ # e2 = REXML::Element.new('baz')
|
|
+ # e1.parent = e2
|
|
+ # e1.parent # => <baz/>
|
|
+ #
|
|
+ # When an element is added as a child, its parent is set automatically:
|
|
+ #
|
|
+ # e1.add_element(e0)
|
|
+ # e0.parent # => <bar> ... </>
|
|
+ #
|
|
+ # For an element that has no parent, method +parent+ returns +nil+.
|
|
+ #
|
|
+ # === Children
|
|
+ #
|
|
+ # An element has zero or more children.
|
|
+ # The children are an ordered collection
|
|
+ # of all objects whose parent is the element itself.
|
|
+ #
|
|
+ # The children may include any combination of elements, text, comments,
|
|
+ # processing instructions, and CDATA.
|
|
+ # (This example keeps things clean by controlling whitespace
|
|
+ # via a +context+ setting.)
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <ele_0/>
|
|
+ # text 0
|
|
+ # <!--comment 0-->
|
|
+ # <?target_0 pi_0?>
|
|
+ # <![CDATA[cdata 0]]>
|
|
+ # <ele_1/>
|
|
+ # text 1
|
|
+ # <!--comment 1-->
|
|
+ # <?target_0 pi_1?>
|
|
+ # <![CDATA[cdata 1]]>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # context = {ignore_whitespace_nodes: :all, compress_whitespace: :all}
|
|
+ # d = REXML::Document.new(xml_string, context)
|
|
+ # root = d.root
|
|
+ # root.children.size # => 10
|
|
+ # root.each {|child| p "#{child.class}: #{child}" }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # "REXML::Element: <ele_0/>"
|
|
+ # "REXML::Text: \n text 0\n "
|
|
+ # "REXML::Comment: comment 0"
|
|
+ # "REXML::Instruction: <?target_0 pi_0?>"
|
|
+ # "REXML::CData: cdata 0"
|
|
+ # "REXML::Element: <ele_1/>"
|
|
+ # "REXML::Text: \n text 1\n "
|
|
+ # "REXML::Comment: comment 1"
|
|
+ # "REXML::Instruction: <?target_0 pi_1?>"
|
|
+ # "REXML::CData: cdata 1"
|
|
+ #
|
|
+ # A child may be added using inherited methods
|
|
+ # Parent#insert_before or Parent#insert_after:
|
|
+ #
|
|
+ # xml_string = '<root><a/><c/><d/></root>'
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # root = d.root
|
|
+ # c = d.root[1] # => <c/>
|
|
+ # root.insert_before(c, REXML::Element.new('b'))
|
|
+ # root.to_a # => [<a/>, <b/>, <c/>, <d/>]
|
|
+ #
|
|
+ # A child may be replaced using Parent#replace_child:
|
|
+ #
|
|
+ # root.replace_child(c, REXML::Element.new('x'))
|
|
+ # root.to_a # => [<a/>, <b/>, <x/>, <d/>]
|
|
+ #
|
|
+ # A child may be removed using Parent#delete:
|
|
+ #
|
|
+ # x = root[2] # => <x/>
|
|
+ # root.delete(x)
|
|
+ # root.to_a # => [<a/>, <b/>, <d/>]
|
|
+ #
|
|
+ # === Siblings
|
|
+ #
|
|
+ # An element has zero or more siblings,
|
|
+ # which are the other children of the element's parent.
|
|
+ #
|
|
+ # In the example above, element +ele_1+ is between a CDATA sibling
|
|
+ # and a text sibling:
|
|
+ #
|
|
+ # ele_1 = root[5] # => <ele_1/>
|
|
+ # ele_1.previous_sibling # => "cdata 0"
|
|
+ # ele_1.next_sibling # => "\n text 1\n "
|
|
+ #
|
|
+ # === \Attributes
|
|
+ #
|
|
+ # An element has zero or more named attributes.
|
|
+ #
|
|
+ # A new element has no attributes:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.attributes # => {}
|
|
+ #
|
|
+ # Attributes may be added:
|
|
+ #
|
|
+ # e.add_attribute('bar', 'baz')
|
|
+ # e.add_attribute('bat', 'bam')
|
|
+ # e.attributes.size # => 2
|
|
+ # e['bar'] # => "baz"
|
|
+ # e['bat'] # => "bam"
|
|
+ #
|
|
+ # An existing attribute may be modified:
|
|
+ #
|
|
+ # e.add_attribute('bar', 'bad')
|
|
+ # e.attributes.size # => 2
|
|
+ # e['bar'] # => "bad"
|
|
+ #
|
|
+ # An existing attribute may be deleted:
|
|
+ #
|
|
+ # e.delete_attribute('bar')
|
|
+ # e.attributes.size # => 1
|
|
+ # e['bar'] # => nil
|
|
+ #
|
|
+ # == What's Here
|
|
+ #
|
|
+ # To begin with, what's elsewhere?
|
|
+ #
|
|
+ # \Class \REXML::Element inherits from its ancestor classes:
|
|
+ #
|
|
+ # - REXML::Child
|
|
+ # - REXML::Parent
|
|
+ #
|
|
+ # \REXML::Element itself and its ancestors also include modules:
|
|
+ #
|
|
+ # - {Enumerable}[https://docs.ruby-lang.org/en/master/Enumerable.html]
|
|
+ # - REXML::Namespace
|
|
+ # - REXML::Node
|
|
+ # - REXML::XMLTokens
|
|
+ #
|
|
+ # === Methods for Creating an \Element
|
|
+ #
|
|
+ # ::new:: Returns a new empty element.
|
|
+ # #clone:: Returns a clone of another element.
|
|
+ #
|
|
+ # === Methods for Attributes
|
|
+ #
|
|
+ # {[attribute_name]}[#method-i-5B-5D]:: Returns an attribute value.
|
|
+ # #add_attribute:: Adds a new attribute.
|
|
+ # #add_attributes:: Adds multiple new attributes.
|
|
+ # #attribute:: Returns the attribute value for a given name and optional namespace.
|
|
+ # #delete_attribute:: Removes an attribute.
|
|
+ #
|
|
+ # === Methods for Children
|
|
+ #
|
|
+ # {[index]}[#method-i-5B-5D]:: Returns the child at the given offset.
|
|
+ # #add_element:: Adds an element as the last child.
|
|
+ # #delete_element:: Deletes a child element.
|
|
+ # #each_element:: Calls the given block with each child element.
|
|
+ # #each_element_with_attribute:: Calls the given block with each child element
|
|
+ # that meets given criteria,
|
|
+ # which can include the attribute name.
|
|
+ # #each_element_with_text:: Calls the given block with each child element
|
|
+ # that meets given criteria,
|
|
+ # which can include text.
|
|
+ # #get_elements:: Returns an array of element children that match a given xpath.
|
|
+ #
|
|
+ # === Methods for \Text Children
|
|
+ #
|
|
+ # #add_text:: Adds a text node to the element.
|
|
+ # #get_text:: Returns a text node that meets specified criteria.
|
|
+ # #text:: Returns the text string from the first node that meets specified criteria.
|
|
+ # #texts:: Returns an array of the text children of the element.
|
|
+ # #text=:: Adds, removes, or replaces the first text child of the element
|
|
+ #
|
|
+ # === Methods for Other Children
|
|
+ #
|
|
+ # #cdatas:: Returns an array of the cdata children of the element.
|
|
+ # #comments:: Returns an array of the comment children of the element.
|
|
+ # #instructions:: Returns an array of the instruction children of the element.
|
|
+ #
|
|
+ # === Methods for Namespaces
|
|
+ #
|
|
+ # #add_namespace:: Adds a namespace to the element.
|
|
+ # #delete_namespace:: Removes a namespace from the element.
|
|
+ # #namespace:: Returns the string namespace URI for the element.
|
|
+ # #namespaces:: Returns a hash of all defined namespaces in the element.
|
|
+ # #prefixes:: Returns an array of the string prefixes (names)
|
|
+ # of all defined namespaces in the element
|
|
+ #
|
|
+ # === Methods for Querying
|
|
+ #
|
|
+ # #document:: Returns the document, if any, that the element belongs to.
|
|
+ # #root:: Returns the most distant element (not document) ancestor of the element.
|
|
+ # #root_node:: Returns the most distant ancestor of the element.
|
|
+ # #xpath:: Returns the string xpath to the element
|
|
+ # relative to the most distant parent
|
|
+ # #has_attributes?:: Returns whether the element has attributes.
|
|
+ # #has_elements?:: Returns whether the element has elements.
|
|
+ # #has_text?:: Returns whether the element has text.
|
|
+ # #next_element:: Returns the next sibling that is an element.
|
|
+ # #previous_element:: Returns the previous sibling that is an element.
|
|
+ # #raw:: Returns whether raw mode is set for the element.
|
|
+ # #whitespace:: Returns whether whitespace is respected for the element.
|
|
+ # #ignore_whitespace_nodes:: Returns whether whitespace nodes
|
|
+ # are to be ignored for the element.
|
|
+ # #node_type:: Returns symbol <tt>:element</tt>.
|
|
+ #
|
|
+ # === One More Method
|
|
+ #
|
|
+ # #inspect:: Returns a string representation of the element.
|
|
+ #
|
|
+ # === Accessors
|
|
+ #
|
|
+ # #elements:: Returns the REXML::Elements object for the element.
|
|
+ # #attributes:: Returns the REXML::Attributes object for the element.
|
|
+ # #context:: Returns or sets the context hash for the element.
|
|
+ #
|
|
class Element < Parent
|
|
include Namespace
|
|
|
|
@@ -30,32 +280,42 @@ module REXML
|
|
# whitespace handling.
|
|
attr_accessor :context
|
|
|
|
- # Constructor
|
|
- # arg::
|
|
- # if not supplied, will be set to the default value.
|
|
- # If a String, the name of this object will be set to the argument.
|
|
- # If an Element, the object will be shallowly cloned; name,
|
|
- # attributes, and namespaces will be copied. Children will +not+ be
|
|
- # copied.
|
|
- # parent::
|
|
- # if supplied, must be a Parent, and will be used as
|
|
- # the parent of this object.
|
|
- # context::
|
|
- # If supplied, must be a hash containing context items. Context items
|
|
- # include:
|
|
- # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
|
|
- # strings being the names of the elements to respect
|
|
- # whitespace for. Defaults to :+all+.
|
|
- # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
|
|
- # strings being the names of the elements to ignore whitespace on.
|
|
- # Overrides :+respect_whitespace+.
|
|
- # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
|
|
- # of strings being the names of the elements in which to ignore
|
|
- # whitespace-only nodes. If this is set, Text nodes which contain only
|
|
- # whitespace will not be added to the document tree.
|
|
- # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
|
|
- # the elements to process in raw mode. In raw mode, special
|
|
- # characters in text is not converted to or from entities.
|
|
+ # :call-seq:
|
|
+ # Element.new(name = 'UNDEFINED', parent = nil, context = nil) -> new_element
|
|
+ # Element.new(element, parent = nil, context = nil) -> new_element
|
|
+ #
|
|
+ # Returns a new \REXML::Element object.
|
|
+ #
|
|
+ # When no arguments are given,
|
|
+ # returns an element with name <tt>'UNDEFINED'</tt>:
|
|
+ #
|
|
+ # e = REXML::Element.new # => <UNDEFINED/>
|
|
+ # e.class # => REXML::Element
|
|
+ # e.name # => "UNDEFINED"
|
|
+ #
|
|
+ # When only argument +name+ is given,
|
|
+ # returns an element of the given name:
|
|
+ #
|
|
+ # REXML::Element.new('foo') # => <foo/>
|
|
+ #
|
|
+ # When only argument +element+ is given, it must be an \REXML::Element object;
|
|
+ # returns a shallow copy of the given element:
|
|
+ #
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e1 = REXML::Element.new(e0) # => <foo/>
|
|
+ #
|
|
+ # When argument +parent+ is also given, it must be an REXML::Parent object:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo', REXML::Parent.new)
|
|
+ # e.parent # => #<REXML::Parent @parent=nil, @children=[<foo/>]>
|
|
+ #
|
|
+ # When argument +context+ is also given, it must be a hash
|
|
+ # representing the context for the element;
|
|
+ # see {Element Context}[../doc/rexml/context_rdoc.html]:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo', nil, {raw: :all})
|
|
+ # e.context # => {:raw=>:all}
|
|
+ #
|
|
def initialize( arg = UNDEFINED, parent=nil, context=nil )
|
|
super(parent)
|
|
|
|
@@ -74,6 +334,27 @@ module REXML
|
|
end
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # inspect -> string
|
|
+ #
|
|
+ # Returns a string representation of the element.
|
|
+ #
|
|
+ # For an element with no attributes and no children, shows the element name:
|
|
+ #
|
|
+ # REXML::Element.new.inspect # => "<UNDEFINED/>"
|
|
+ #
|
|
+ # Shows attributes, if any:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.add_attributes({'bar' => 0, 'baz' => 1})
|
|
+ # e.inspect # => "<foo bar='0' baz='1'/>"
|
|
+ #
|
|
+ # Shows an ellipsis (<tt>...</tt>), if there are child elements:
|
|
+ #
|
|
+ # e.add_element(REXML::Element.new('bar'))
|
|
+ # e.add_element(REXML::Element.new('baz'))
|
|
+ # e.inspect # => "<foo bar='0' baz='1'> ... </>"
|
|
+ #
|
|
def inspect
|
|
rv = "<#@expanded_name"
|
|
|
|
@@ -89,60 +370,118 @@ module REXML
|
|
end
|
|
end
|
|
|
|
-
|
|
- # Creates a shallow copy of self.
|
|
- # d = Document.new "<a><b/><b/><c><d/></c></a>"
|
|
- # new_a = d.root.clone
|
|
- # puts new_a # => "<a/>"
|
|
+ # :call-seq:
|
|
+ # clone -> new_element
|
|
+ #
|
|
+ # Returns a shallow copy of the element, containing the name and attributes,
|
|
+ # but not the parent or children:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.add_attributes({'bar' => 0, 'baz' => 1})
|
|
+ # e.clone # => <foo bar='0' baz='1'/>
|
|
+ #
|
|
def clone
|
|
self.class.new self
|
|
end
|
|
|
|
- # Evaluates to the root node of the document that this element
|
|
- # belongs to. If this element doesn't belong to a document, but does
|
|
- # belong to another Element, the parent's root will be returned, until the
|
|
- # earliest ancestor is found.
|
|
- #
|
|
- # Note that this is not the same as the document element.
|
|
- # In the following example, <a> is the document element, and the root
|
|
- # node is the parent node of the document element. You may ask yourself
|
|
- # why the root node is useful: consider the doctype and XML declaration,
|
|
- # and any processing instructions before the document element... they
|
|
- # are children of the root node, or siblings of the document element.
|
|
- # The only time this isn't true is when an Element is created that is
|
|
- # not part of any Document. In this case, the ancestor that has no
|
|
- # parent acts as the root node.
|
|
- # d = Document.new '<a><b><c/></b></a>'
|
|
- # a = d[1] ; c = a[1][1]
|
|
- # d.root_node == d # TRUE
|
|
- # a.root_node # namely, d
|
|
- # c.root_node # again, d
|
|
+ # :call-seq:
|
|
+ # root_node -> document or element
|
|
+ #
|
|
+ # Returns the most distant ancestor of +self+.
|
|
+ #
|
|
+ # When the element is part of a document,
|
|
+ # returns the root node of the document.
|
|
+ # Note that the root node is different from the document element;
|
|
+ # in this example +a+ is document element and the root node is its parent:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a><b><c/></b></a>')
|
|
+ # top_element = d.first # => <a> ... </>
|
|
+ # child = top_element.first # => <b> ... </>
|
|
+ # d.root_node == d # => true
|
|
+ # top_element.root_node == d # => true
|
|
+ # child.root_node == d # => true
|
|
+ #
|
|
+ # When the element is not part of a document, but does have ancestor elements,
|
|
+ # returns the most distant ancestor element:
|
|
+ #
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e1 = REXML::Element.new('bar')
|
|
+ # e1.parent = e0
|
|
+ # e2 = REXML::Element.new('baz')
|
|
+ # e2.parent = e1
|
|
+ # e2.root_node == e0 # => true
|
|
+ #
|
|
+ # When the element has no ancestor elements,
|
|
+ # returns +self+:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.root_node == e # => true
|
|
+ #
|
|
+ # Related: #root, #document.
|
|
+ #
|
|
def root_node
|
|
parent.nil? ? self : parent.root_node
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # root -> element
|
|
+ #
|
|
+ # Returns the most distant _element_ (not document) ancestor of the element:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a><b><c/></b></a>')
|
|
+ # top_element = d.first
|
|
+ # child = top_element.first
|
|
+ # top_element.root == top_element # => true
|
|
+ # child.root == top_element # => true
|
|
+ #
|
|
+ # For a document, returns the topmost element:
|
|
+ #
|
|
+ # d.root == top_element # => true
|
|
+ #
|
|
+ # Related: #root_node, #document.
|
|
+ #
|
|
def root
|
|
return elements[1] if self.kind_of? Document
|
|
return self if parent.kind_of? Document or parent.nil?
|
|
return parent.root
|
|
end
|
|
|
|
- # Evaluates to the document to which this element belongs, or nil if this
|
|
- # element doesn't belong to a document.
|
|
+ # :call-seq:
|
|
+ # document -> document or nil
|
|
+ #
|
|
+ # If the element is part of a document, returns that document:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a><b><c/></b></a>')
|
|
+ # top_element = d.first
|
|
+ # child = top_element.first
|
|
+ # top_element.document == d # => true
|
|
+ # child.document == d # => true
|
|
+ #
|
|
+ # If the element is not part of a document, returns +nil+:
|
|
+ #
|
|
+ # REXML::Element.new.document # => nil
|
|
+ #
|
|
+ # For a document, returns +self+:
|
|
+ #
|
|
+ # d.document == d # => true
|
|
+ #
|
|
+ # Related: #root, #root_node.
|
|
+ #
|
|
def document
|
|
rt = root
|
|
rt.parent if rt
|
|
end
|
|
|
|
- # Evaluates to +true+ if whitespace is respected for this element. This
|
|
- # is the case if:
|
|
- # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
|
|
- # 2. The context has :+respect_whitespace+ set to :+all+ or
|
|
- # an array containing the name of this element, and
|
|
- # :+compress_whitespace+ isn't set to :+all+ or an array containing the
|
|
- # name of this element.
|
|
- # The evaluation is tested against +expanded_name+, and so is namespace
|
|
- # sensitive.
|
|
+ # :call-seq:
|
|
+ # whitespace
|
|
+ #
|
|
+ # Returns +true+ if whitespace is respected for this element,
|
|
+ # +false+ otherwise.
|
|
+ #
|
|
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
|
|
+ #
|
|
+ # The evaluation is tested against the element's +expanded_name+,
|
|
+ # and so is namespace-sensitive.
|
|
def whitespace
|
|
@whitespace = nil
|
|
if @context
|
|
@@ -159,6 +498,13 @@ module REXML
|
|
@whitespace
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # ignore_whitespace_nodes
|
|
+ #
|
|
+ # Returns +true+ if whitespace nodes are ignored for the element.
|
|
+ #
|
|
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
|
|
+ #
|
|
def ignore_whitespace_nodes
|
|
@ignore_whitespace_nodes = false
|
|
if @context
|
|
@@ -170,9 +516,12 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Evaluates to +true+ if raw mode is set for this element. This
|
|
- # is the case if the context has :+raw+ set to :+all+ or
|
|
- # an array containing the name of this element.
|
|
+ # :call-seq:
|
|
+ # raw
|
|
+ #
|
|
+ # Returns +true+ if raw mode is set for the element.
|
|
+ #
|
|
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
|
|
#
|
|
# The evaluation is tested against +expanded_name+, and so is namespace
|
|
# sensitive.
|
|
@@ -180,7 +529,7 @@ module REXML
|
|
@raw = (@context and @context[:raw] and
|
|
(@context[:raw] == :all or
|
|
@context[:raw].include? expanded_name))
|
|
- @raw
|
|
+ @raw
|
|
end
|
|
|
|
#once :whitespace, :raw, :ignore_whitespace_nodes
|
|
@@ -189,10 +538,25 @@ module REXML
|
|
# Namespaces #
|
|
#################################################
|
|
|
|
- # Evaluates to an +Array+ containing the prefixes (names) of all defined
|
|
- # namespaces at this context node.
|
|
- # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
|
|
- # doc.elements['//b'].prefixes # -> ['x', 'y']
|
|
+ # :call-seq:
|
|
+ # prefixes -> array_of_namespace_prefixes
|
|
+ #
|
|
+ # Returns an array of the string prefixes (names) of all defined namespaces
|
|
+ # in the element and its ancestors:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <a xmlns:x='1' xmlns:y='2'>
|
|
+ # <b/>
|
|
+ # <c xmlns:z='3'/>
|
|
+ # </a>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string, {compress_whitespace: :all})
|
|
+ # d.elements['//a'].prefixes # => ["x", "y"]
|
|
+ # d.elements['//b'].prefixes # => ["x", "y"]
|
|
+ # d.elements['//c'].prefixes # => ["x", "y", "z"]
|
|
+ #
|
|
def prefixes
|
|
prefixes = []
|
|
prefixes = parent.prefixes if parent
|
|
@@ -200,6 +564,25 @@ module REXML
|
|
return prefixes
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # namespaces -> array_of_namespace_names
|
|
+ #
|
|
+ # Returns a hash of all defined namespaces
|
|
+ # in the element and its ancestors:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <a xmlns:x='1' xmlns:y='2'>
|
|
+ # <b/>
|
|
+ # <c xmlns:z='3'/>
|
|
+ # </a>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.elements['//a'].namespaces # => {"x"=>"1", "y"=>"2"}
|
|
+ # d.elements['//b'].namespaces # => {"x"=>"1", "y"=>"2"}
|
|
+ # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
|
|
+ #
|
|
def namespaces
|
|
namespaces = {}
|
|
namespaces = parent.namespaces if parent
|
|
@@ -207,19 +590,26 @@ module REXML
|
|
return namespaces
|
|
end
|
|
|
|
- # Evaluates to the URI for a prefix, or the empty string if no such
|
|
- # namespace is declared for this element. Evaluates recursively for
|
|
- # ancestors. Returns the default namespace, if there is one.
|
|
- # prefix::
|
|
- # the prefix to search for. If not supplied, returns the default
|
|
- # namespace if one exists
|
|
- # Returns::
|
|
- # the namespace URI as a String, or nil if no such namespace
|
|
- # exists. If the namespace is undefined, returns an empty string
|
|
- # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
|
|
- # b = doc.elements['//b']
|
|
- # b.namespace # -> '1'
|
|
- # b.namespace("y") # -> '2'
|
|
+ # :call-seq:
|
|
+ # namespace(prefix = nil) -> string_uri or nil
|
|
+ #
|
|
+ # Returns the string namespace URI for the element,
|
|
+ # possibly deriving from one of its ancestors.
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <a xmlns='1' xmlns:y='2'>
|
|
+ # <b/>
|
|
+ # <c xmlns:z='3'/>
|
|
+ # </a>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # b = d.elements['//b']
|
|
+ # b.namespace # => "1"
|
|
+ # b.namespace('y') # => "2"
|
|
+ # b.namespace('nosuch') # => nil
|
|
+ #
|
|
def namespace(prefix=nil)
|
|
if prefix.nil?
|
|
prefix = prefix()
|
|
@@ -235,19 +625,24 @@ module REXML
|
|
return ns
|
|
end
|
|
|
|
- # Adds a namespace to this element.
|
|
- # prefix::
|
|
- # the prefix string, or the namespace URI if +uri+ is not
|
|
- # supplied
|
|
- # uri::
|
|
- # the namespace URI. May be nil, in which +prefix+ is used as
|
|
- # the URI
|
|
- # Evaluates to: this Element
|
|
- # a = Element.new("a")
|
|
- # a.add_namespace("xmlns:foo", "bar" )
|
|
- # a.add_namespace("foo", "bar") # shorthand for previous line
|
|
- # a.add_namespace("twiddle")
|
|
- # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
|
|
+ # :call-seq:
|
|
+ # add_namespace(prefix, uri = nil) -> self
|
|
+ #
|
|
+ # Adds a namespace to the element; returns +self+.
|
|
+ #
|
|
+ # With the single argument +prefix+,
|
|
+ # adds a namespace using the given +prefix+ and the namespace URI:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.add_namespace('bar')
|
|
+ # e.namespaces # => {"xmlns"=>"bar"}
|
|
+ #
|
|
+ # With both arguments +prefix+ and +uri+ given,
|
|
+ # adds a namespace using both arguments:
|
|
+ #
|
|
+ # e.add_namespace('baz', 'bat')
|
|
+ # e.namespaces # => {"xmlns"=>"bar", "baz"=>"bat"}
|
|
+ #
|
|
def add_namespace( prefix, uri=nil )
|
|
unless uri
|
|
@attributes["xmlns"] = prefix
|
|
@@ -258,16 +653,28 @@ module REXML
|
|
self
|
|
end
|
|
|
|
- # Removes a namespace from this node. This only works if the namespace is
|
|
- # actually declared in this node. If no argument is passed, deletes the
|
|
- # default namespace.
|
|
+ # :call-seq:
|
|
+ # delete_namespace(namespace = 'xmlns') -> self
|
|
+ #
|
|
+ # Removes a namespace from the element.
|
|
+ #
|
|
+ # With no argument, removes the default namespace:
|
|
+ #
|
|
+ # d = REXML::Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
|
|
+ # d.to_s # => "<a xmlns:foo='bar' xmlns='twiddle'/>"
|
|
+ # d.root.delete_namespace # => <a xmlns:foo='bar'/>
|
|
+ # d.to_s # => "<a xmlns:foo='bar'/>"
|
|
+ #
|
|
+ # With argument +namespace+, removes the specified namespace:
|
|
+ #
|
|
+ # d.root.delete_namespace('foo')
|
|
+ # d.to_s # => "<a/>"
|
|
+ #
|
|
+ # Does nothing if no such namespace is found:
|
|
+ #
|
|
+ # d.root.delete_namespace('nosuch')
|
|
+ # d.to_s # => "<a/>"
|
|
#
|
|
- # Evaluates to: this element
|
|
- # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
|
|
- # doc.root.delete_namespace
|
|
- # puts doc # -> <a xmlns:foo='bar'/>
|
|
- # doc.root.delete_namespace 'foo'
|
|
- # puts doc # -> <a/>
|
|
def delete_namespace namespace="xmlns"
|
|
namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
|
|
attribute = attributes.get_attribute(namespace)
|
|
@@ -279,20 +686,40 @@ module REXML
|
|
# Elements #
|
|
#################################################
|
|
|
|
- # Adds a child to this element, optionally setting attributes in
|
|
- # the element.
|
|
- # element::
|
|
- # optional. If Element, the element is added.
|
|
- # Otherwise, a new Element is constructed with the argument (see
|
|
- # Element.initialize).
|
|
- # attrs::
|
|
- # If supplied, must be a Hash containing String name,value
|
|
- # pairs, which will be used to set the attributes of the new Element.
|
|
- # Returns:: the Element that was added
|
|
- # el = doc.add_element 'my-tag'
|
|
- # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
|
|
- # el = Element.new 'my-tag'
|
|
- # doc.add_element el
|
|
+ # :call-seq:
|
|
+ # add_element(name, attributes = nil) -> new_element
|
|
+ # add_element(element, attributes = nil) -> element
|
|
+ #
|
|
+ # Adds a child element, optionally setting attributes
|
|
+ # on the added element; returns the added element.
|
|
+ #
|
|
+ # With string argument +name+, creates a new element with that name
|
|
+ # and adds the new element as a child:
|
|
+ #
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e0.add_element('bar')
|
|
+ # e0[0] # => <bar/>
|
|
+ #
|
|
+ #
|
|
+ # With argument +name+ and hash argument +attributes+,
|
|
+ # sets attributes on the new element:
|
|
+ #
|
|
+ # e0.add_element('baz', {'bat' => '0', 'bam' => '1'})
|
|
+ # e0[1] # => <baz bat='0' bam='1'/>
|
|
+ #
|
|
+ # With element argument +element+, adds that element as a child:
|
|
+ #
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e1 = REXML::Element.new('bar')
|
|
+ # e0.add_element(e1)
|
|
+ # e0[0] # => <bar/>
|
|
+ #
|
|
+ # With argument +element+ and hash argument +attributes+,
|
|
+ # sets attributes on the added element:
|
|
+ #
|
|
+ # e0.add_element(e1, {'bat' => '0', 'bam' => '1'})
|
|
+ # e0[1] # => <bar bat='0' bam='1'/>
|
|
+ #
|
|
def add_element element, attrs=nil
|
|
raise "First argument must be either an element name, or an Element object" if element.nil?
|
|
el = @elements.add(element)
|
|
@@ -302,52 +729,112 @@ module REXML
|
|
el
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # delete_element(index) -> removed_element or nil
|
|
+ # delete_element(element) -> removed_element or nil
|
|
+ # delete_element(xpath) -> removed_element or nil
|
|
+ #
|
|
# Deletes a child element.
|
|
- # element::
|
|
- # Must be an +Element+, +String+, or +Integer+. If Element,
|
|
- # the element is removed. If String, the element is found (via XPath)
|
|
- # and removed. <em>This means that any parent can remove any
|
|
- # descendant.<em> If Integer, the Element indexed by that number will be
|
|
- # removed.
|
|
- # Returns:: the element that was removed.
|
|
- # doc.delete_element "/a/b/c[@id='4']"
|
|
- # doc.delete_element doc.elements["//k"]
|
|
- # doc.delete_element 1
|
|
+ #
|
|
+ # When 1-based integer argument +index+ is given,
|
|
+ # removes and returns the child element at that offset if it exists;
|
|
+ # indexing does not include text nodes;
|
|
+ # returns +nil+ if the element does not exist:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # a = d.root # => <a> ... </>
|
|
+ # a.delete_element(1) # => <b/>
|
|
+ # a.delete_element(1) # => <c/>
|
|
+ # a.delete_element(1) # => nil
|
|
+ #
|
|
+ # When element argument +element+ is given,
|
|
+ # removes and returns that child element if it exists,
|
|
+ # otherwise returns +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # a = d.root # => <a> ... </>
|
|
+ # c = a[2] # => <c/>
|
|
+ # a.delete_element(c) # => <c/>
|
|
+ # a.delete_element(c) # => nil
|
|
+ #
|
|
+ # When xpath argument +xpath+ is given,
|
|
+ # removes and returns the element at xpath if it exists,
|
|
+ # otherwise returns +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # a = d.root # => <a> ... </>
|
|
+ # a.delete_element('//c') # => <c/>
|
|
+ # a.delete_element('//c') # => nil
|
|
+ #
|
|
def delete_element element
|
|
@elements.delete element
|
|
end
|
|
|
|
- # Evaluates to +true+ if this element has at least one child Element
|
|
- # doc = Document.new "<a><b/><c>Text</c></a>"
|
|
- # doc.root.has_elements # -> true
|
|
- # doc.elements["/a/b"].has_elements # -> false
|
|
- # doc.elements["/a/c"].has_elements # -> false
|
|
+ # :call-seq:
|
|
+ # has_elements?
|
|
+ #
|
|
+ # Returns +true+ if the element has one or more element children,
|
|
+ # +false+ otherwise:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # a = d.root # => <a> ... </>
|
|
+ # a.has_elements? # => true
|
|
+ # b = a[0] # => <b/>
|
|
+ # b.has_elements? # => false
|
|
+ #
|
|
def has_elements?
|
|
!@elements.empty?
|
|
end
|
|
|
|
- # Iterates through the child elements, yielding for each Element that
|
|
- # has a particular attribute set.
|
|
- # key::
|
|
- # the name of the attribute to search for
|
|
- # value::
|
|
- # the value of the attribute
|
|
- # max::
|
|
- # (optional) causes this method to return after yielding
|
|
- # for this number of matching children
|
|
- # name::
|
|
- # (optional) if supplied, this is an XPath that filters
|
|
- # the children to check.
|
|
- #
|
|
- # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
|
|
- # # Yields b, c, d
|
|
- # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
|
|
- # # Yields b, d
|
|
- # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
|
|
- # # Yields b
|
|
- # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
|
|
- # # Yields d
|
|
- # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
|
|
+ # :call-seq:
|
|
+ # each_element_with_attribute(attr_name, value = nil, max = 0, xpath = nil) {|e| ... }
|
|
+ #
|
|
+ # Calls the given block with each child element that meets given criteria.
|
|
+ #
|
|
+ # When only string argument +attr_name+ is given,
|
|
+ # calls the block with each child element that has that attribute:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b id="1"/><c id="2"/><d id="1"/><e/></a>'
|
|
+ # a = d.root
|
|
+ # a.each_element_with_attribute('id') {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b id='1'/>
|
|
+ # <c id='2'/>
|
|
+ # <d id='1'/>
|
|
+ #
|
|
+ # With argument +attr_name+ and string argument +value+ given,
|
|
+ # calls the block with each child element that has that attribute
|
|
+ # with that value:
|
|
+ #
|
|
+ # a.each_element_with_attribute('id', '1') {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b id='1'/>
|
|
+ # <d id='1'/>
|
|
+ #
|
|
+ # With arguments +attr_name+, +value+, and integer argument +max+ given,
|
|
+ # calls the block with at most +max+ child elements:
|
|
+ #
|
|
+ # a.each_element_with_attribute('id', '1', 1) {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b id='1'/>
|
|
+ #
|
|
+ # With all arguments given, including +xpath+,
|
|
+ # calls the block with only those child elements
|
|
+ # that meet the first three criteria,
|
|
+ # and also match the given +xpath+:
|
|
+ #
|
|
+ # a.each_element_with_attribute('id', '1', 2, '//d') {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <d id='1'/>
|
|
+ #
|
|
def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
|
|
each_with_something( proc {|child|
|
|
if value.nil?
|
|
@@ -358,27 +845,53 @@ module REXML
|
|
}, max, name, &block )
|
|
end
|
|
|
|
- # Iterates through the children, yielding for each Element that
|
|
- # has a particular text set.
|
|
- # text::
|
|
- # the text to search for. If nil, or not supplied, will iterate
|
|
- # over all +Element+ children that contain at least one +Text+ node.
|
|
- # max::
|
|
- # (optional) causes this method to return after yielding
|
|
- # for this number of matching children
|
|
- # name::
|
|
- # (optional) if supplied, this is an XPath that filters
|
|
- # the children to check.
|
|
- #
|
|
- # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
|
|
- # # Yields b, c, d
|
|
- # doc.each_element_with_text {|e|p e}
|
|
- # # Yields b, c
|
|
- # doc.each_element_with_text('b'){|e|p e}
|
|
- # # Yields b
|
|
- # doc.each_element_with_text('b', 1){|e|p e}
|
|
- # # Yields d
|
|
- # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
|
|
+ # :call-seq:
|
|
+ # each_element_with_text(text = nil, max = 0, xpath = nil) {|e| ... }
|
|
+ #
|
|
+ # Calls the given block with each child element that meets given criteria.
|
|
+ #
|
|
+ # With no arguments, calls the block with each child element that has text:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
|
|
+ # a = d.root
|
|
+ # a.each_element_with_text {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b> ... </>
|
|
+ # <c> ... </>
|
|
+ # <d> ... </>
|
|
+ #
|
|
+ # With the single string argument +text+,
|
|
+ # calls the block with each element that has exactly that text:
|
|
+ #
|
|
+ # a.each_element_with_text('b') {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b> ... </>
|
|
+ # <c> ... </>
|
|
+ #
|
|
+ # With argument +text+ and integer argument +max+,
|
|
+ # calls the block with at most +max+ elements:
|
|
+ #
|
|
+ # a.each_element_with_text('b', 1) {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b> ... </>
|
|
+ #
|
|
+ # With all arguments given, including +xpath+,
|
|
+ # calls the block with only those child elements
|
|
+ # that meet the first two criteria,
|
|
+ # and also match the given +xpath+:
|
|
+ #
|
|
+ # a.each_element_with_text('b', 2, '//c') {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <c> ... </>
|
|
+ #
|
|
def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
|
|
each_with_something( proc {|child|
|
|
if text.nil?
|
|
@@ -389,35 +902,71 @@ module REXML
|
|
}, max, name, &block )
|
|
end
|
|
|
|
- # Synonym for Element.elements.each
|
|
+ # :call-seq:
|
|
+ # each_element {|e| ... }
|
|
+ #
|
|
+ # Calls the given block with each child element:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
|
|
+ # a = d.root
|
|
+ # a.each_element {|e| p e }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <b> ... </>
|
|
+ # <c> ... </>
|
|
+ # <d> ... </>
|
|
+ # <e/>
|
|
+ #
|
|
def each_element( xpath=nil, &block ) # :yields: Element
|
|
@elements.each( xpath, &block )
|
|
end
|
|
|
|
- # Synonym for Element.to_a
|
|
- # This is a little slower than calling elements.each directly.
|
|
- # xpath:: any XPath by which to search for elements in the tree
|
|
- # Returns:: an array of Elements that match the supplied path
|
|
+ # :call-seq:
|
|
+ # get_elements(xpath)
|
|
+ #
|
|
+ # Returns an array of the elements that match the given +xpath+:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <a level='1'>
|
|
+ # <a level='2'/>
|
|
+ # </a>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.root.get_elements('//a') # => [<a level='1'> ... </>, <a level='2'/>]
|
|
+ #
|
|
def get_elements( xpath )
|
|
@elements.to_a( xpath )
|
|
end
|
|
|
|
- # Returns the next sibling that is an element, or nil if there is
|
|
- # no Element sibling after this one
|
|
- # doc = Document.new '<a><b/>text<c/></a>'
|
|
- # doc.root.elements['b'].next_element #-> <c/>
|
|
- # doc.root.elements['c'].next_element #-> nil
|
|
+ # :call-seq:
|
|
+ # next_element
|
|
+ #
|
|
+ # Returns the next sibling that is an element if it exists,
|
|
+ # +niL+ otherwise:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # d.root.elements['b'].next_element #-> <c/>
|
|
+ # d.root.elements['c'].next_element #-> nil
|
|
+ #
|
|
def next_element
|
|
element = next_sibling
|
|
element = element.next_sibling until element.nil? or element.kind_of? Element
|
|
return element
|
|
end
|
|
|
|
- # Returns the previous sibling that is an element, or nil if there is
|
|
- # no Element sibling prior to this one
|
|
- # doc = Document.new '<a><b/>text<c/></a>'
|
|
- # doc.root.elements['c'].previous_element #-> <b/>
|
|
- # doc.root.elements['b'].previous_element #-> nil
|
|
+ # :call-seq:
|
|
+ # previous_element
|
|
+ #
|
|
+ # Returns the previous sibling that is an element if it exists,
|
|
+ # +niL+ otherwise:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # d.root.elements['c'].previous_element #-> <b/>
|
|
+ # d.root.elements['b'].previous_element #-> nil
|
|
+ #
|
|
def previous_element
|
|
element = previous_sibling
|
|
element = element.previous_sibling until element.nil? or element.kind_of? Element
|
|
@@ -429,36 +978,69 @@ module REXML
|
|
# Text #
|
|
#################################################
|
|
|
|
- # Evaluates to +true+ if this element has at least one Text child
|
|
+ # :call-seq:
|
|
+ # has_text? -> true or false
|
|
+ #
|
|
+ # Returns +true+ if the element has one or more text noded,
|
|
+ # +false+ otherwise:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
|
|
+ # a = d.root
|
|
+ # a.has_text? # => true
|
|
+ # b = a[0]
|
|
+ # b.has_text? # => false
|
|
+ #
|
|
def has_text?
|
|
not text().nil?
|
|
end
|
|
|
|
- # A convenience method which returns the String value of the _first_
|
|
- # child text element, if one exists, and +nil+ otherwise.
|
|
+ # :call-seq:
|
|
+ # text(xpath = nil) -> text_string or nil
|
|
+ #
|
|
+ # Returns the text string from the first text node child
|
|
+ # in a specified element, if it exists, +nil+ otherwise.
|
|
#
|
|
- # <em>Note that an element may have multiple Text elements, perhaps
|
|
- # separated by other children</em>. Be aware that this method only returns
|
|
- # the first Text node.
|
|
+ # With no argument, returns the text from the first text node in +self+:
|
|
#
|
|
- # This method returns the +value+ of the first text child node, which
|
|
- # ignores the +raw+ setting, so always returns normalized text. See
|
|
- # the Text::value documentation.
|
|
+ # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
+ # d.root.text.class # => String
|
|
+ # d.root.text # => "some text "
|
|
+ #
|
|
+ # With argument +xpath+, returns text from the first text node
|
|
+ # in the element that matches +xpath+:
|
|
+ #
|
|
+ # d.root.text(1) # => "this is bold!"
|
|
+ #
|
|
+ # Note that an element may have multiple text nodes,
|
|
+ # possibly separated by other non-text children, as above.
|
|
+ # Even so, the returned value is the string text from the first such node.
|
|
+ #
|
|
+ # Note also that the text note is retrieved by method get_text,
|
|
+ # and so is always normalized text.
|
|
#
|
|
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
- # # The element 'p' has two text elements, "some text " and " more text".
|
|
- # doc.root.text #-> "some text "
|
|
def text( path = nil )
|
|
rv = get_text(path)
|
|
return rv.value unless rv.nil?
|
|
nil
|
|
end
|
|
|
|
- # Returns the first child Text node, if any, or +nil+ otherwise.
|
|
- # This method returns the actual +Text+ node, rather than the String content.
|
|
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
- # # The element 'p' has two text elements, "some text " and " more text".
|
|
- # doc.root.get_text.value #-> "some text "
|
|
+ # :call-seq:
|
|
+ # get_text(xpath = nil) -> text_node or nil
|
|
+ #
|
|
+ # Returns the first text node child in a specified element, if it exists,
|
|
+ # +nil+ otherwise.
|
|
+ #
|
|
+ # With no argument, returns the first text node from +self+:
|
|
+ #
|
|
+ # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
+ # d.root.get_text.class # => REXML::Text
|
|
+ # d.root.get_text # => "some text "
|
|
+ #
|
|
+ # With argument +xpath+, returns the first text node from the element
|
|
+ # that matches +xpath+:
|
|
+ #
|
|
+ # d.root.get_text(1) # => "this is bold!"
|
|
+ #
|
|
def get_text path = nil
|
|
rv = nil
|
|
if path
|
|
@@ -470,26 +1052,31 @@ module REXML
|
|
return rv
|
|
end
|
|
|
|
- # Sets the first Text child of this object. See text() for a
|
|
- # discussion about Text children.
|
|
- #
|
|
- # If a Text child already exists, the child is replaced by this
|
|
- # content. This means that Text content can be deleted by calling
|
|
- # this method with a nil argument. In this case, the next Text
|
|
- # child becomes the first Text child. In no case is the order of
|
|
- # any siblings disturbed.
|
|
- # text::
|
|
- # If a String, a new Text child is created and added to
|
|
- # this Element as the first Text child. If Text, the text is set
|
|
- # as the first Child element. If nil, then any existing first Text
|
|
- # child is removed.
|
|
- # Returns:: this Element.
|
|
- # doc = Document.new '<a><b/></a>'
|
|
- # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
|
|
- # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
|
|
- # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
|
|
- # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
|
|
- # doc.root.text = nil #-> '<a><b/><c/></a>'
|
|
+ # :call-seq:
|
|
+ # text = string -> string
|
|
+ # text = nil -> nil
|
|
+ #
|
|
+ # Adds, replaces, or removes the first text node child in the element.
|
|
+ #
|
|
+ # With string argument +string+,
|
|
+ # creates a new \REXML::Text node containing that string,
|
|
+ # honoring the current settings for whitespace and row,
|
|
+ # then places the node as the first text child in the element;
|
|
+ # returns +string+.
|
|
+ #
|
|
+ # If the element has no text child, the text node is added:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a><b/></a>'
|
|
+ # d.root.text = 'foo' #-> '<a><b/>foo</a>'
|
|
+ #
|
|
+ # If the element has a text child, it is replaced:
|
|
+ #
|
|
+ # d.root.text = 'bar' #-> '<a><b/>bar</a>'
|
|
+ #
|
|
+ # With argument +nil+, removes the first text child:
|
|
+ #
|
|
+ # d.root.text = nil #-> '<a><b/><c/></a>'
|
|
+ #
|
|
def text=( text )
|
|
if text.kind_of? String
|
|
text = Text.new( text, whitespace(), nil, raw() )
|
|
@@ -509,17 +1096,45 @@ module REXML
|
|
return self
|
|
end
|
|
|
|
- # A helper method to add a Text child. Actual Text instances can
|
|
- # be added with regular Parent methods, such as add() and <<()
|
|
- # text::
|
|
- # if a String, a new Text instance is created and added
|
|
- # to the parent. If Text, the object is added directly.
|
|
- # Returns:: this Element
|
|
- # e = Element.new('a') #-> <e/>
|
|
- # e.add_text 'foo' #-> <e>foo</e>
|
|
- # e.add_text Text.new(' bar') #-> <e>foo bar</e>
|
|
- # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
|
|
- # element and <b>2</b> Text node children.
|
|
+ # :call-seq:
|
|
+ # add_text(string) -> nil
|
|
+ # add_text(text_node) -> self
|
|
+ #
|
|
+ # Adds text to the element.
|
|
+ #
|
|
+ # When string argument +string+ is given, returns +nil+.
|
|
+ #
|
|
+ # If the element has no child text node,
|
|
+ # creates a \REXML::Text object using the string,
|
|
+ # honoring the current settings for whitespace and raw,
|
|
+ # then adds that node to the element:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a><b/></a>')
|
|
+ # a = d.root
|
|
+ # a.add_text('foo')
|
|
+ # a.to_a # => [<b/>, "foo"]
|
|
+ #
|
|
+ # If the element has child text nodes,
|
|
+ # appends the string to the _last_ text node:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a>foo<b/>bar</a>')
|
|
+ # a = d.root
|
|
+ # a.add_text('baz')
|
|
+ # a.to_a # => ["foo", <b/>, "barbaz"]
|
|
+ # a.add_text('baz')
|
|
+ # a.to_a # => ["foo", <b/>, "barbazbaz"]
|
|
+ #
|
|
+ # When text node argument +text_node+ is given,
|
|
+ # appends the node as the last text node in the element;
|
|
+ # returns +self+:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a>foo<b/>bar</a>')
|
|
+ # a = d.root
|
|
+ # a.add_text(REXML::Text.new('baz'))
|
|
+ # a.to_a # => ["foo", <b/>, "bar", "baz"]
|
|
+ # a.add_text(REXML::Text.new('baz'))
|
|
+ # a.to_a # => ["foo", <b/>, "bar", "baz", "baz"]
|
|
+ #
|
|
def add_text( text )
|
|
if text.kind_of? String
|
|
if @children[-1].kind_of? Text
|
|
@@ -532,10 +1147,39 @@ module REXML
|
|
return self
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # node_type -> :element
|
|
+ #
|
|
+ # Returns symbol <tt>:element</tt>:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a/>')
|
|
+ # a = d.root # => <a/>
|
|
+ # a.node_type # => :element
|
|
+ #
|
|
def node_type
|
|
:element
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # xpath -> string_xpath
|
|
+ #
|
|
+ # Returns the string xpath to the element
|
|
+ # relative to the most distant parent:
|
|
+ #
|
|
+ # d = REXML::Document.new('<a><b><c/></b></a>')
|
|
+ # a = d.root # => <a> ... </>
|
|
+ # b = a[0] # => <b> ... </>
|
|
+ # c = b[0] # => <c/>
|
|
+ # d.xpath # => ""
|
|
+ # a.xpath # => "/a"
|
|
+ # b.xpath # => "/a/b"
|
|
+ # c.xpath # => "/a/b/c"
|
|
+ #
|
|
+ # If there is no parent, returns the expanded name of the element:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.xpath # => "foo"
|
|
+ #
|
|
def xpath
|
|
path_elements = []
|
|
cur = self
|
|
@@ -551,19 +1195,45 @@ module REXML
|
|
# Attributes #
|
|
#################################################
|
|
|
|
- # Fetches an attribute value or a child.
|
|
+ # :call-seq:
|
|
+ # [index] -> object
|
|
+ # [attr_name] -> attr_value
|
|
+ # [attr_sym] -> attr_value
|
|
+ #
|
|
+ # With integer argument +index+ given,
|
|
+ # returns the child at offset +index+, or +nil+ if none:
|
|
+ #
|
|
+ # d = REXML::Document.new '><root><a/>text<b/>more<c/></root>'
|
|
+ # root = d.root
|
|
+ # (0..root.size).each do |index|
|
|
+ # node = root[index]
|
|
+ # p "#{index}: #{node} (#{node.class})"
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # "0: <a/> (REXML::Element)"
|
|
+ # "1: text (REXML::Text)"
|
|
+ # "2: <b/> (REXML::Element)"
|
|
+ # "3: more (REXML::Text)"
|
|
+ # "4: <c/> (REXML::Element)"
|
|
+ # "5: (NilClass)"
|
|
+ #
|
|
+ # With string argument +attr_name+ given,
|
|
+ # returns the string value for the given attribute name if it exists,
|
|
+ # otherwise +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new('<root attr="value"></root>')
|
|
+ # root = d.root
|
|
+ # root['attr'] # => "value"
|
|
+ # root['nosuch'] # => nil
|
|
#
|
|
- # If String or Symbol is specified, it's treated as attribute
|
|
- # name. Attribute value as String or +nil+ is returned. This case
|
|
- # is shortcut of +attributes[name]+.
|
|
+ # With symbol argument +attr_sym+ given,
|
|
+ # returns <tt>[attr_sym.to_s]</tt>:
|
|
#
|
|
- # If Integer is specified, it's treated as the index of
|
|
- # child. It returns Nth child.
|
|
+ # root[:attr] # => "value"
|
|
+ # root[:nosuch] # => nil
|
|
#
|
|
- # doc = REXML::Document.new("<a attr='1'><b/><c/></a>")
|
|
- # doc.root["attr"] # => "1"
|
|
- # doc.root.attributes["attr"] # => "1"
|
|
- # doc.root[1] # => <c/>
|
|
def [](name_or_index)
|
|
case name_or_index
|
|
when String
|
|
@@ -575,17 +1245,42 @@ module REXML
|
|
end
|
|
end
|
|
|
|
+
|
|
+ # :call-seq:
|
|
+ # attribute(name, namespace = nil)
|
|
+ #
|
|
+ # Returns the string value for the given attribute name.
|
|
+ #
|
|
+ # With only argument +name+ given,
|
|
+ # returns the value of the named attribute if it exists, otherwise +nil+:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns="ns0">
|
|
+ # <a xmlns="ns1" attr="value"></a>
|
|
+ # <b xmlns="ns2" attr="value"></b>
|
|
+ # <c attr="value"/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # root = d.root
|
|
+ # a = root[1] # => <a xmlns='ns1' attr='value'/>
|
|
+ # a.attribute('attr') # => attr='value'
|
|
+ # a.attribute('nope') # => nil
|
|
+ #
|
|
+ # With arguments +name+ and +namespace+ given,
|
|
+ # returns the value of the named attribute if it exists, otherwise +nil+:
|
|
+ #
|
|
+ # xml_string = "<root xmlns:a='a' a:x='a:x' x='x'/>"
|
|
+ # document = REXML::Document.new(xml_string)
|
|
+ # document.root.attribute("x") # => x='x'
|
|
+ # document.root.attribute("x", "a") # => a:x='a:x'
|
|
+ #
|
|
def attribute( name, namespace=nil )
|
|
- prefix = nil
|
|
- if namespaces.respond_to? :key
|
|
- prefix = namespaces.key(namespace) if namespace
|
|
- else
|
|
- prefix = namespaces.index(namespace) if namespace
|
|
- end
|
|
+ prefix = namespaces.key(namespace) if namespace
|
|
prefix = nil if prefix == 'xmlns'
|
|
|
|
ret_val =
|
|
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
|
|
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
|
|
|
return ret_val unless ret_val.nil?
|
|
return nil if prefix.nil?
|
|
@@ -598,29 +1293,46 @@ module REXML
|
|
|
|
end
|
|
|
|
- # Evaluates to +true+ if this element has any attributes set, false
|
|
- # otherwise.
|
|
+ # :call-seq:
|
|
+ # has_attributes? -> true or false
|
|
+ #
|
|
+ # Returns +true+ if the element has attributes, +false+ otherwise:
|
|
+ #
|
|
+ # d = REXML::Document.new('<root><a attr="val"/><b/></root>')
|
|
+ # a, b = *d.root
|
|
+ # a.has_attributes? # => true
|
|
+ # b.has_attributes? # => false
|
|
+ #
|
|
def has_attributes?
|
|
return !@attributes.empty?
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # add_attribute(name, value) -> value
|
|
+ # add_attribute(attribute) -> attribute
|
|
+ #
|
|
# Adds an attribute to this element, overwriting any existing attribute
|
|
# by the same name.
|
|
- # key::
|
|
- # can be either an Attribute or a String. If an Attribute,
|
|
- # the attribute is added to the list of Element attributes. If String,
|
|
- # the argument is used as the name of the new attribute, and the value
|
|
- # parameter must be supplied.
|
|
- # value::
|
|
- # Required if +key+ is a String, and ignored if the first argument is
|
|
- # an Attribute. This is a String, and is used as the value
|
|
- # of the new Attribute. This should be the unnormalized value of the
|
|
- # attribute (without entities).
|
|
- # Returns:: the Attribute added
|
|
- # e = Element.new 'e'
|
|
- # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
|
|
- # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
|
|
- # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
|
|
+ #
|
|
+ # With string argument +name+ and object +value+ are given,
|
|
+ # adds the attribute created with that name and value:
|
|
+ #
|
|
+ # e = REXML::Element.new
|
|
+ # e.add_attribute('attr', 'value') # => "value"
|
|
+ # e['attr'] # => "value"
|
|
+ # e.add_attribute('attr', 'VALUE') # => "VALUE"
|
|
+ # e['attr'] # => "VALUE"
|
|
+ #
|
|
+ # With only attribute object +attribute+ given,
|
|
+ # adds the given attribute:
|
|
+ #
|
|
+ # a = REXML::Attribute.new('attr', 'value')
|
|
+ # e.add_attribute(a) # => attr='value'
|
|
+ # e['attr'] # => "value"
|
|
+ # a = REXML::Attribute.new('attr', 'VALUE')
|
|
+ # e.add_attribute(a) # => attr='VALUE'
|
|
+ # e['attr'] # => "VALUE"
|
|
+ #
|
|
def add_attribute( key, value=nil )
|
|
if key.kind_of? Attribute
|
|
@attributes << key
|
|
@@ -629,10 +1341,29 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Add multiple attributes to this element.
|
|
- # hash:: is either a hash, or array of arrays
|
|
- # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
|
|
- # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
|
|
+ # :call-seq:
|
|
+ # add_attributes(hash) -> hash
|
|
+ # add_attributes(array)
|
|
+ #
|
|
+ # Adds zero or more attributes to the element;
|
|
+ # returns the argument.
|
|
+ #
|
|
+ # If hash argument +hash+ is given,
|
|
+ # each key must be a string;
|
|
+ # adds each attribute created with the key/value pair:
|
|
+ #
|
|
+ # e = REXML::Element.new
|
|
+ # h = {'foo' => 'bar', 'baz' => 'bat'}
|
|
+ # e.add_attributes(h)
|
|
+ #
|
|
+ # If argument +array+ is given,
|
|
+ # each array member must be a 2-element array <tt>[name, value];
|
|
+ # each name must be a string:
|
|
+ #
|
|
+ # e = REXML::Element.new
|
|
+ # a = [['foo' => 'bar'], ['baz' => 'bat']]
|
|
+ # e.add_attributes(a)
|
|
+ #
|
|
def add_attributes hash
|
|
if hash.kind_of? Hash
|
|
hash.each_pair {|key, value| @attributes[key] = value }
|
|
@@ -641,19 +1372,17 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Removes an attribute
|
|
- # key::
|
|
- # either an Attribute or a String. In either case, the
|
|
- # attribute is found by matching the attribute name to the argument,
|
|
- # and then removed. If no attribute is found, no action is taken.
|
|
- # Returns::
|
|
- # the attribute removed, or nil if this Element did not contain
|
|
- # a matching attribute
|
|
- # e = Element.new('E')
|
|
- # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
|
|
- # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
|
|
- # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
|
|
- # e.delete_attribute( r ) #-> <E/>
|
|
+ # :call-seq:
|
|
+ # delete_attribute(name) -> removed_attribute or nil
|
|
+ #
|
|
+ # Removes a named attribute if it exists;
|
|
+ # returns the removed attribute if found, otherwise +nil+:
|
|
+ #
|
|
+ # e = REXML::Element.new('foo')
|
|
+ # e.add_attribute('bar', 'baz')
|
|
+ # e.delete_attribute('bar') # => <bar/>
|
|
+ # e.delete_attribute('bar') # => nil
|
|
+ #
|
|
def delete_attribute(key)
|
|
attr = @attributes.get_attribute(key)
|
|
attr.remove unless attr.nil?
|
|
@@ -663,26 +1392,80 @@ module REXML
|
|
# Other Utilities #
|
|
#################################################
|
|
|
|
- # Get an array of all CData children.
|
|
- # IMMUTABLE
|
|
+ # :call-seq:
|
|
+ # cdatas -> array_of_cdata_children
|
|
+ #
|
|
+ # Returns a frozen array of the REXML::CData children of the element:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <![CDATA[foo]]>
|
|
+ # <![CDATA[bar]]>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # cds = d.root.cdatas # => ["foo", "bar"]
|
|
+ # cds.frozen? # => true
|
|
+ # cds.map {|cd| cd.class } # => [REXML::CData, REXML::CData]
|
|
+ #
|
|
def cdatas
|
|
find_all { |child| child.kind_of? CData }.freeze
|
|
end
|
|
|
|
- # Get an array of all Comment children.
|
|
- # IMMUTABLE
|
|
+ # :call-seq:
|
|
+ # comments -> array_of_comment_children
|
|
+ #
|
|
+ # Returns a frozen array of the REXML::Comment children of the element:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <!--foo-->
|
|
+ # <!--bar-->
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # cs = d.root.comments
|
|
+ # cs.frozen? # => true
|
|
+ # cs.map {|c| c.class } # => [REXML::Comment, REXML::Comment]
|
|
+ # cs.map {|c| c.to_s } # => ["foo", "bar"]
|
|
+ #
|
|
def comments
|
|
find_all { |child| child.kind_of? Comment }.freeze
|
|
end
|
|
|
|
- # Get an array of all Instruction children.
|
|
- # IMMUTABLE
|
|
+ # :call-seq:
|
|
+ # instructions -> array_of_instruction_children
|
|
+ #
|
|
+ # Returns a frozen array of the REXML::Instruction children of the element:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root>
|
|
+ # <?target0 foo?>
|
|
+ # <?target1 bar?>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # is = d.root.instructions
|
|
+ # is.frozen? # => true
|
|
+ # is.map {|i| i.class } # => [REXML::Instruction, REXML::Instruction]
|
|
+ # is.map {|i| i.to_s } # => ["<?target0 foo?>", "<?target1 bar?>"]
|
|
+ #
|
|
def instructions
|
|
find_all { |child| child.kind_of? Instruction }.freeze
|
|
end
|
|
|
|
- # Get an array of all Text children.
|
|
- # IMMUTABLE
|
|
+ # :call-seq:
|
|
+ # texts -> array_of_text_children
|
|
+ #
|
|
+ # Returns a frozen array of the REXML::Text children of the element:
|
|
+ #
|
|
+ # xml_string = '<root><a/>text<b/>more<c/></root>'
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ts = d.root.texts
|
|
+ # ts.frozen? # => true
|
|
+ # ts.map {|t| t.class } # => [REXML::Text, REXML::Text]
|
|
+ # ts.map {|t| t.to_s } # => ["text", "more"]
|
|
+ #
|
|
def texts
|
|
find_all { |child| child.kind_of? Text }.freeze
|
|
end
|
|
@@ -713,7 +1496,7 @@ module REXML
|
|
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
|
|
formatter = if indent > -1
|
|
if transitive
|
|
- require "rexml/formatters/transitive"
|
|
+ require_relative "formatters/transitive"
|
|
REXML::Formatters::Transitive.new( indent, ie_hack )
|
|
else
|
|
REXML::Formatters::Pretty.new( indent, ie_hack )
|
|
@@ -758,35 +1541,129 @@ module REXML
|
|
# XPath search support. You are expected to only encounter this class as
|
|
# the <tt>element.elements</tt> object. Therefore, you are
|
|
# _not_ expected to instantiate this yourself.
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <?xml version="1.0" encoding="UTF-8"?>
|
|
+ # <bookstore>
|
|
+ # <book category="cooking">
|
|
+ # <title lang="en">Everyday Italian</title>
|
|
+ # <author>Giada De Laurentiis</author>
|
|
+ # <year>2005</year>
|
|
+ # <price>30.00</price>
|
|
+ # </book>
|
|
+ # <book category="children">
|
|
+ # <title lang="en">Harry Potter</title>
|
|
+ # <author>J K. Rowling</author>
|
|
+ # <year>2005</year>
|
|
+ # <price>29.99</price>
|
|
+ # </book>
|
|
+ # <book category="web">
|
|
+ # <title lang="en">XQuery Kick Start</title>
|
|
+ # <author>James McGovern</author>
|
|
+ # <author>Per Bothner</author>
|
|
+ # <author>Kurt Cagle</author>
|
|
+ # <author>James Linn</author>
|
|
+ # <author>Vaidyanathan Nagarajan</author>
|
|
+ # <year>2003</year>
|
|
+ # <price>49.99</price>
|
|
+ # </book>
|
|
+ # <book category="web" cover="paperback">
|
|
+ # <title lang="en">Learning XML</title>
|
|
+ # <author>Erik T. Ray</author>
|
|
+ # <year>2003</year>
|
|
+ # <price>39.95</price>
|
|
+ # </book>
|
|
+ # </bookstore>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ #
|
|
class Elements
|
|
include Enumerable
|
|
- # Constructor
|
|
- # parent:: the parent Element
|
|
+ # :call-seq:
|
|
+ # new(parent) -> new_elements_object
|
|
+ #
|
|
+ # Returns a new \Elements object with the given +parent+.
|
|
+ # Does _not_ assign <tt>parent.elements = self</tt>:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # eles = REXML::Elements.new(d.root)
|
|
+ # eles # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles == d.root.elements # => false
|
|
+ #
|
|
def initialize parent
|
|
@element = parent
|
|
end
|
|
|
|
- # Fetches a child element. Filters only Element children, regardless of
|
|
- # the XPath match.
|
|
- # index::
|
|
- # the search parameter. This is either an Integer, which
|
|
- # will be used to find the index'th child Element, or an XPath,
|
|
- # which will be used to search for the Element. <em>Because
|
|
- # of the nature of XPath searches, any element in the connected XML
|
|
- # document can be fetched through any other element.</em> <b>The
|
|
- # Integer index is 1-based, not 0-based.</b> This means that the first
|
|
- # child element is at index 1, not 0, and the +n+th element is at index
|
|
- # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
|
|
- # starting from 1, not 0, and the indexes should be the same.
|
|
- # name::
|
|
- # optional, and only used in the first argument is an
|
|
- # Integer. In that case, the index'th child Element that has the
|
|
- # supplied name will be returned. Note again that the indexes start at 1.
|
|
- # Returns:: the first matching Element, or nil if no child matched
|
|
- # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
|
|
- # doc.root.elements[1] #-> <b/>
|
|
- # doc.root.elements['c'] #-> <c id="1"/>
|
|
- # doc.root.elements[2,'c'] #-> <c id="2"/>
|
|
+ # :call-seq:
|
|
+ # parent
|
|
+ #
|
|
+ # Returns the parent element cited in creating the \Elements object.
|
|
+ # This element is also the default starting point for searching
|
|
+ # in the \Elements object.
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = REXML::Elements.new(d.root)
|
|
+ # elements.parent == d.root # => true
|
|
+ #
|
|
+ def parent
|
|
+ @element
|
|
+ end
|
|
+
|
|
+ # :call-seq:
|
|
+ # elements[index] -> element or nil
|
|
+ # elements[xpath] -> element or nil
|
|
+ # elements[n, name] -> element or nil
|
|
+ #
|
|
+ # Returns the first \Element object selected by the arguments,
|
|
+ # if any found, or +nil+ if none found.
|
|
+ #
|
|
+ # Notes:
|
|
+ # - The +index+ is 1-based, not 0-based, so that:
|
|
+ # - The first element has index <tt>1</tt>
|
|
+ # - The _nth_ element has index +n+.
|
|
+ # - The selection ignores non-\Element nodes.
|
|
+ #
|
|
+ # When the single argument +index+ is given,
|
|
+ # returns the element given by the index, if any; otherwise, +nil+:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # eles = d.root.elements
|
|
+ # eles # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles[1] # => <book category='cooking'> ... </>
|
|
+ # eles.size # => 4
|
|
+ # eles[4] # => <book category='web' cover='paperback'> ... </>
|
|
+ # eles[5] # => nil
|
|
+ #
|
|
+ # The node at this index is not an \Element, and so is not returned:
|
|
+ #
|
|
+ # eles = d.root.first.first # => <title lang='en'> ... </>
|
|
+ # eles.to_a # => ["Everyday Italian"]
|
|
+ # eles[1] # => nil
|
|
+ #
|
|
+ # When the single argument +xpath+ is given,
|
|
+ # returns the first element found via that +xpath+, if any; otherwise, +nil+:
|
|
+ #
|
|
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles['/bookstore'] # => <bookstore> ... </>
|
|
+ # eles['//book'] # => <book category='cooking'> ... </>
|
|
+ # eles['//book [@category="children"]'] # => <book category='children'> ... </>
|
|
+ # eles['/nosuch'] # => nil
|
|
+ # eles['//nosuch'] # => nil
|
|
+ # eles['//book [@category="nosuch"]'] # => nil
|
|
+ # eles['.'] # => <bookstore> ... </>
|
|
+ # eles['..'].class # => REXML::Document
|
|
+ #
|
|
+ # With arguments +n+ and +name+ given,
|
|
+ # returns the _nth_ found element that has the given +name+,
|
|
+ # or +nil+ if there is no such _nth_ element:
|
|
+ #
|
|
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles[1, 'book'] # => <book category='cooking'> ... </>
|
|
+ # eles[4, 'book'] # => <book category='web' cover='paperback'> ... </>
|
|
+ # eles[5, 'book'] # => nil
|
|
+ #
|
|
def []( index, name=nil)
|
|
if index.kind_of? Integer
|
|
raise "index (#{index}) must be >= 1" if index < 1
|
|
@@ -806,19 +1683,42 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Sets an element, replacing any previous matching element. If no
|
|
- # existing element is found ,the element is added.
|
|
- # index:: Used to find a matching element to replace. See []().
|
|
- # element::
|
|
- # The element to replace the existing element with
|
|
- # the previous element
|
|
- # Returns:: nil if no previous element was found.
|
|
+ # :call-seq:
|
|
+ # elements[] = index, replacement_element -> replacement_element or nil
|
|
+ #
|
|
+ # Replaces or adds an element.
|
|
+ #
|
|
+ # When <tt>eles[index]</tt> exists, replaces it with +replacement_element+
|
|
+ # and returns +replacement_element+:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles[1] # => <book category='cooking'> ... </>
|
|
+ # eles[1] = REXML::Element.new('foo')
|
|
+ # eles[1] # => <foo/>
|
|
+ #
|
|
+ # Does nothing (or raises an exception)
|
|
+ # if +replacement_element+ is not an \Element:
|
|
+ # eles[2] # => <book category='web' cover='paperback'> ... </>
|
|
+ # eles[2] = REXML::Text.new('bar')
|
|
+ # eles[2] # => <book category='web' cover='paperback'> ... </>
|
|
+ #
|
|
+ # When <tt>eles[index]</tt> does not exist,
|
|
+ # adds +replacement_element+ to the element and returns
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
|
|
+ # eles.size # => 4
|
|
+ # eles[50] = REXML::Element.new('foo') # => <foo/>
|
|
+ # eles.size # => 5
|
|
+ # eles[5] # => <foo/>
|
|
+ #
|
|
+ # Does nothing (or raises an exception)
|
|
+ # if +replacement_element+ is not an \Element:
|
|
+ #
|
|
+ # eles[50] = REXML::Text.new('bar') # => "bar"
|
|
+ # eles.size # => 5
|
|
#
|
|
- # doc = Document.new '<a/>'
|
|
- # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
|
|
- # doc.root.elements[1] #-> <b/>
|
|
- # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
|
|
- # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
|
|
def []=( index, element )
|
|
previous = self[index]
|
|
if previous.nil?
|
|
@@ -829,14 +1729,34 @@ module REXML
|
|
return previous
|
|
end
|
|
|
|
- # Returns +true+ if there are no +Element+ children, +false+ otherwise
|
|
+ # :call-seq:
|
|
+ # empty? -> true or false
|
|
+ #
|
|
+ # Returns +true+ if there are no children, +false+ otherwise.
|
|
+ #
|
|
+ # d = REXML::Document.new('')
|
|
+ # d.elements.empty? # => true
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.elements.empty? # => false
|
|
+ #
|
|
def empty?
|
|
@element.find{ |child| child.kind_of? Element}.nil?
|
|
end
|
|
|
|
- # Returns the index of the supplied child (starting at 1), or -1 if
|
|
- # the element is not a child
|
|
- # element:: an +Element+ child
|
|
+ # :call-seq:
|
|
+ # index(element)
|
|
+ #
|
|
+ # Returns the 1-based index of the given +element+, if found;
|
|
+ # otherwise, returns -1:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # ele_1, ele_2, ele_3, ele_4 = *elements
|
|
+ # elements.index(ele_4) # => 4
|
|
+ # elements.delete(ele_3)
|
|
+ # elements.index(ele_4) # => 3
|
|
+ # elements.index(ele_3) # => -1
|
|
+ #
|
|
def index element
|
|
rv = 0
|
|
found = @element.find do |child|
|
|
@@ -848,17 +1768,47 @@ module REXML
|
|
return -1
|
|
end
|
|
|
|
- # Deletes a child Element
|
|
- # element::
|
|
- # Either an Element, which is removed directly; an
|
|
- # xpath, where the first matching child is removed; or an Integer,
|
|
- # where the n'th Element is removed.
|
|
- # Returns:: the removed child
|
|
- # doc = Document.new '<a><b/><c/><c id="1"/></a>'
|
|
- # b = doc.root.elements[1]
|
|
- # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
|
|
- # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
|
|
- # doc.root.elements.delete 1 #-> <a/>
|
|
+ # :call-seq:
|
|
+ # delete(index) -> removed_element or nil
|
|
+ # delete(element) -> removed_element or nil
|
|
+ # delete(xpath) -> removed_element or nil
|
|
+ #
|
|
+ # Removes an element; returns the removed element, or +nil+ if none removed.
|
|
+ #
|
|
+ # With integer argument +index+ given,
|
|
+ # removes the child element at that offset:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.size # => 4
|
|
+ # elements[2] # => <book category='children'> ... </>
|
|
+ # elements.delete(2) # => <book category='children'> ... </>
|
|
+ # elements.size # => 3
|
|
+ # elements[2] # => <book category='web'> ... </>
|
|
+ # elements.delete(50) # => nil
|
|
+ #
|
|
+ # With element argument +element+ given,
|
|
+ # removes that child element:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # ele_1, ele_2, ele_3, ele_4 = *elements
|
|
+ # elements.size # => 4
|
|
+ # elements[2] # => <book category='children'> ... </>
|
|
+ # elements.delete(ele_2) # => <book category='children'> ... </>
|
|
+ # elements.size # => 3
|
|
+ # elements[2] # => <book category='web'> ... </>
|
|
+ # elements.delete(ele_2) # => nil
|
|
+ #
|
|
+ # With string argument +xpath+ given,
|
|
+ # removes the first element found via that xpath:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.delete('//book') # => <book category='cooking'> ... </>
|
|
+ # elements.delete('//book [@category="children"]') # => <book category='children'> ... </>
|
|
+ # elements.delete('//nosuch') # => nil
|
|
+ #
|
|
def delete element
|
|
if element.kind_of? Element
|
|
@element.delete element
|
|
@@ -868,12 +1818,23 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Removes multiple elements. Filters for Element children, regardless of
|
|
- # XPath matching.
|
|
- # xpath:: all elements matching this String path are removed.
|
|
- # Returns:: an Array of Elements that have been removed
|
|
- # doc = Document.new '<a><c/><c/><c/><c/></a>'
|
|
- # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
|
|
+ # :call-seq:
|
|
+ # delete_all(xpath)
|
|
+ #
|
|
+ # Removes all elements found via the given +xpath+;
|
|
+ # returns the array of removed elements, if any, else +nil+.
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.size # => 4
|
|
+ # deleted_elements = elements.delete_all('//book [@category="web"]')
|
|
+ # deleted_elements.size # => 2
|
|
+ # elements.size # => 2
|
|
+ # deleted_elements = elements.delete_all('//book')
|
|
+ # deleted_elements.size # => 2
|
|
+ # elements.size # => 0
|
|
+ # elements.delete_all('//book') # => []
|
|
+ #
|
|
def delete_all( xpath )
|
|
rv = []
|
|
XPath::each( @element, xpath) {|element|
|
|
@@ -886,15 +1847,68 @@ module REXML
|
|
return rv
|
|
end
|
|
|
|
- # Adds an element
|
|
- # element::
|
|
- # if supplied, is either an Element, String, or
|
|
- # Source (see Element.initialize). If not supplied or nil, a
|
|
- # new, default Element will be constructed
|
|
- # Returns:: the added Element
|
|
- # a = Element.new('a')
|
|
- # a.elements.add(Element.new('b')) #-> <a><b/></a>
|
|
- # a.elements.add('c') #-> <a><b/><c/></a>
|
|
+ # :call-seq:
|
|
+ # add -> new_element
|
|
+ # add(name) -> new_element
|
|
+ # add(element) -> element
|
|
+ #
|
|
+ # Adds an element; returns the element added.
|
|
+ #
|
|
+ # With no argument, creates and adds a new element.
|
|
+ # The new element has:
|
|
+ #
|
|
+ # - No name.
|
|
+ # - \Parent from the \Elements object.
|
|
+ # - Context from the that parent.
|
|
+ #
|
|
+ # Example:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # parent = elements.parent # => <bookstore> ... </>
|
|
+ # parent.context = {raw: :all}
|
|
+ # elements.size # => 4
|
|
+ # new_element = elements.add # => </>
|
|
+ # elements.size # => 5
|
|
+ # new_element.name # => nil
|
|
+ # new_element.parent # => <bookstore> ... </>
|
|
+ # new_element.context # => {:raw=>:all}
|
|
+ #
|
|
+ # With string argument +name+, creates and adds a new element.
|
|
+ # The new element has:
|
|
+ #
|
|
+ # - Name +name+.
|
|
+ # - \Parent from the \Elements object.
|
|
+ # - Context from the that parent.
|
|
+ #
|
|
+ # Example:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # parent = elements.parent # => <bookstore> ... </>
|
|
+ # parent.context = {raw: :all}
|
|
+ # elements.size # => 4
|
|
+ # new_element = elements.add('foo') # => <foo/>
|
|
+ # elements.size # => 5
|
|
+ # new_element.name # => "foo"
|
|
+ # new_element.parent # => <bookstore> ... </>
|
|
+ # new_element.context # => {:raw=>:all}
|
|
+ #
|
|
+ # With argument +element+,
|
|
+ # creates and adds a clone of the given +element+.
|
|
+ # The new element has name, parent, and context from the given +element+.
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.size # => 4
|
|
+ # e0 = REXML::Element.new('foo')
|
|
+ # e1 = REXML::Element.new('bar', e0, {raw: :all})
|
|
+ # element = elements.add(e1) # => <bar/>
|
|
+ # elements.size # => 5
|
|
+ # element.name # => "bar"
|
|
+ # element.parent # => <bookstore> ... </>
|
|
+ # element.context # => {:raw=>:all}
|
|
+ #
|
|
def add element=nil
|
|
if element.nil?
|
|
Element.new("", self, @element.context)
|
|
@@ -909,24 +1923,55 @@ module REXML
|
|
|
|
alias :<< :add
|
|
|
|
- # Iterates through all of the child Elements, optionally filtering
|
|
- # them by a given XPath
|
|
- # xpath::
|
|
- # optional. If supplied, this is a String XPath, and is used to
|
|
- # filter the children, so that only matching children are yielded. Note
|
|
- # that XPaths are automatically filtered for Elements, so that
|
|
- # non-Element children will not be yielded
|
|
- # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
|
|
- # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements
|
|
- # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements
|
|
- # doc.root.elements.each('child::node()') {|e|p e}
|
|
- # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
|
|
- # XPath.each(doc.root, 'child::node()', &block)
|
|
- # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
|
|
+ # :call-seq:
|
|
+ # each(xpath = nil) {|element| ... } -> self
|
|
+ #
|
|
+ # Iterates over the elements.
|
|
+ #
|
|
+ # With no argument, calls the block with each element:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.each {|element| p element }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <book category='cooking'> ... </>
|
|
+ # <book category='children'> ... </>
|
|
+ # <book category='web'> ... </>
|
|
+ # <book category='web' cover='paperback'> ... </>
|
|
+ #
|
|
+ # With argument +xpath+, calls the block with each element
|
|
+ # that matches the given +xpath+:
|
|
+ #
|
|
+ # elements.each('//book [@category="web"]') {|element| p element }
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # <book category='web'> ... </>
|
|
+ # <book category='web' cover='paperback'> ... </>
|
|
+ #
|
|
def each( xpath=nil )
|
|
XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # collect(xpath = nil) {|element| ... } -> array
|
|
+ #
|
|
+ # Iterates over the elements; returns the array of block return values.
|
|
+ #
|
|
+ # With no argument, iterates over all elements:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.collect {|element| element.size } # => [9, 9, 17, 9]
|
|
+ #
|
|
+ # With argument +xpath+, iterates over elements that match
|
|
+ # the given +xpath+:
|
|
+ #
|
|
+ # xpath = '//book [@category="web"]'
|
|
+ # elements.collect(xpath) {|element| element.size } # => [17, 9]
|
|
+ #
|
|
def collect( xpath=nil )
|
|
collection = []
|
|
XPath::each( @element, xpath ) {|e|
|
|
@@ -935,6 +1980,83 @@ module REXML
|
|
collection
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # inject(xpath = nil, initial = nil) -> object
|
|
+ #
|
|
+ # Calls the block with elements; returns the last block return value.
|
|
+ #
|
|
+ # With no argument, iterates over the elements, calling the block
|
|
+ # <tt>elements.size - 1</tt> times.
|
|
+ #
|
|
+ # - The first call passes the first and second elements.
|
|
+ # - The second call passes the first block return value and the third element.
|
|
+ # - The third call passes the second block return value and the fourth element.
|
|
+ # - And so on.
|
|
+ #
|
|
+ # In this example, the block returns the passed element,
|
|
+ # which is then the object argument to the next call:
|
|
+ #
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # elements = d.root.elements
|
|
+ # elements.inject do |object, element|
|
|
+ # p [elements.index(object), elements.index(element)]
|
|
+ # element
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # [1, 2]
|
|
+ # [2, 3]
|
|
+ # [3, 4]
|
|
+ #
|
|
+ # With the single argument +xpath+, calls the block only with
|
|
+ # elements matching that xpath:
|
|
+ #
|
|
+ # elements.inject('//book [@category="web"]') do |object, element|
|
|
+ # p [elements.index(object), elements.index(element)]
|
|
+ # element
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # [3, 4]
|
|
+ #
|
|
+ # With argument +xpath+ given as +nil+
|
|
+ # and argument +initial+ also given,
|
|
+ # calls the block once for each element.
|
|
+ #
|
|
+ # - The first call passes the +initial+ and the first element.
|
|
+ # - The second call passes the first block return value and the second element.
|
|
+ # - The third call passes the second block return value and the third element.
|
|
+ # - And so on.
|
|
+ #
|
|
+ # In this example, the first object index is <tt>-1</tt>
|
|
+ #
|
|
+ # elements.inject(nil, 'Initial') do |object, element|
|
|
+ # p [elements.index(object), elements.index(element)]
|
|
+ # element
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # [-1, 1]
|
|
+ # [1, 2]
|
|
+ # [2, 3]
|
|
+ # [3, 4]
|
|
+ #
|
|
+ # In this form the passed object can be used as an accumulator:
|
|
+ #
|
|
+ # elements.inject(nil, 0) do |total, element|
|
|
+ # total += element.size
|
|
+ # end # => 44
|
|
+ #
|
|
+ # With both arguments +xpath+ and +initial+ are given,
|
|
+ # calls the block only with elements matching that xpath:
|
|
+ #
|
|
+ # elements.inject('//book [@category="web"]', 0) do |total, element|
|
|
+ # total += element.size
|
|
+ # end # => 26
|
|
+ #
|
|
def inject( xpath=nil, initial=nil )
|
|
first = true
|
|
XPath::each( @element, xpath ) {|e|
|
|
@@ -950,23 +2072,39 @@ module REXML
|
|
initial
|
|
end
|
|
|
|
- # Returns the number of +Element+ children of the parent object.
|
|
- # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
|
|
- # doc.root.size #-> 6, 3 element and 3 text nodes
|
|
- # doc.root.elements.size #-> 3
|
|
+ # :call-seq:
|
|
+ # size -> integer
|
|
+ #
|
|
+ # Returns the count of \Element children:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
|
|
+ # d.root.elements.size # => 3 # Three elements.
|
|
+ # d.root.size # => 6 # Three elements plus three text nodes..
|
|
+ #
|
|
def size
|
|
count = 0
|
|
@element.each {|child| count+=1 if child.kind_of? Element }
|
|
count
|
|
end
|
|
|
|
- # Returns an Array of Element children. An XPath may be supplied to
|
|
- # filter the children. Only Element children are returned, even if the
|
|
- # supplied XPath matches non-Element children.
|
|
- # doc = Document.new '<a>sean<b/>elliott<c/></a>'
|
|
- # doc.root.elements.to_a #-> [ <b/>, <c/> ]
|
|
- # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
|
|
- # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
|
|
+ # :call-seq:
|
|
+ # to_a(xpath = nil) -> array_of_elements
|
|
+ #
|
|
+ # Returns an array of element children (not including non-element children).
|
|
+ #
|
|
+ # With no argument, returns an array of all element children:
|
|
+ #
|
|
+ # d = REXML::Document.new '<a>sean<b/>elliott<c/></a>'
|
|
+ # elements = d.root.elements
|
|
+ # elements.to_a # => [<b/>, <c/>] # Omits non-element children.
|
|
+ # children = d.root.children
|
|
+ # children # => ["sean", <b/>, "elliott", <c/>] # Includes non-element children.
|
|
+ #
|
|
+ # With argument +xpath+, returns an array of element children
|
|
+ # that match the xpath:
|
|
+ #
|
|
+ # elements.to_a('//c') # => [<c/>]
|
|
+ #
|
|
def to_a( xpath=nil )
|
|
rv = XPath.match( @element, xpath )
|
|
return rv.find_all{|e| e.kind_of? Element} if xpath
|
|
@@ -988,36 +2126,89 @@ module REXML
|
|
# A class that defines the set of Attributes of an Element and provides
|
|
# operations for accessing elements in that set.
|
|
class Attributes < Hash
|
|
- # Constructor
|
|
- # element:: the Element of which this is an Attribute
|
|
+
|
|
+ # :call-seq:
|
|
+ # new(element)
|
|
+ #
|
|
+ # Creates and returns a new \REXML::Attributes object.
|
|
+ # The element given by argument +element+ is stored,
|
|
+ # but its own attributes are not modified:
|
|
+ #
|
|
+ # ele = REXML::Element.new('foo')
|
|
+ # attrs = REXML::Attributes.new(ele)
|
|
+ # attrs.object_id == ele.attributes.object_id # => false
|
|
+ #
|
|
+ # Other instance methods in class \REXML::Attributes may refer to:
|
|
+ #
|
|
+ # - +element.document+.
|
|
+ # - +element.prefix+.
|
|
+ # - +element.expanded_name+.
|
|
+ #
|
|
def initialize element
|
|
@element = element
|
|
end
|
|
|
|
- # Fetches an attribute value. If you want to get the Attribute itself,
|
|
- # use get_attribute()
|
|
- # name:: an XPath attribute name. Namespaces are relevant here.
|
|
- # Returns::
|
|
- # the String value of the matching attribute, or +nil+ if no
|
|
- # matching attribute was found. This is the unnormalized value
|
|
- # (with entities expanded).
|
|
+ # :call-seq:
|
|
+ # [name] -> attribute_value or nil
|
|
+ #
|
|
+ # Returns the value for the attribute given by +name+,
|
|
+ # if it exists; otherwise +nil+.
|
|
+ # The value returned is the unnormalized attribute value,
|
|
+ # with entities expanded:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # ele.attributes['att'] # => "<"
|
|
+ # ele.attributes['bar:att'] # => "2"
|
|
+ # ele.attributes['nosuch'] # => nil
|
|
+ #
|
|
+ # Related: get_attribute (returns an \Attribute object).
|
|
#
|
|
- # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>"
|
|
- # doc.root.attributes['att'] #-> '<'
|
|
- # doc.root.attributes['bar:att'] #-> '2'
|
|
def [](name)
|
|
attr = get_attribute(name)
|
|
return attr.value unless attr.nil?
|
|
return nil
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # to_a -> array_of_attribute_objects
|
|
+ #
|
|
+ # Returns an array of \REXML::Attribute objects representing
|
|
+ # the attributes:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes.to_a # => [foo:att='1', bar:att='2', att='<']
|
|
+ # attrs.first.class # => REXML::Attribute
|
|
+ #
|
|
def to_a
|
|
enum_for(:each_attribute).to_a
|
|
end
|
|
|
|
- # Returns the number of attributes the owning Element contains.
|
|
- # doc = Document "<a x='1' y='2' foo:x='3'/>"
|
|
- # doc.root.attributes.length #-> 3
|
|
+ # :call-seq:
|
|
+ # length
|
|
+ #
|
|
+ # Returns the count of attributes:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # ele.attributes.length # => 3
|
|
+ #
|
|
def length
|
|
c = 0
|
|
each_attribute { c+=1 }
|
|
@@ -1025,14 +2216,30 @@ module REXML
|
|
end
|
|
alias :size :length
|
|
|
|
- # Iterates over the attributes of an Element. Yields actual Attribute
|
|
- # nodes, not String values.
|
|
+ # :call-seq:
|
|
+ # each_attribute {|attr| ... }
|
|
+ #
|
|
+ # Calls the given block with each \REXML::Attribute object:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # ele.attributes.each_attribute do |attr|
|
|
+ # p [attr.class, attr]
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # [REXML::Attribute, foo:att='1']
|
|
+ # [REXML::Attribute, bar:att='2']
|
|
+ # [REXML::Attribute, att='<']
|
|
#
|
|
- # doc = Document.new '<a x="1" y="2"/>'
|
|
- # doc.root.attributes.each_attribute {|attr|
|
|
- # p attr.expanded_name+" => "+attr.value
|
|
- # }
|
|
def each_attribute # :yields: attribute
|
|
+ return to_enum(__method__) unless block_given?
|
|
each_value do |val|
|
|
if val.kind_of? Attribute
|
|
yield val
|
|
@@ -1042,26 +2249,54 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # Iterates over each attribute of an Element, yielding the expanded name
|
|
- # and value as a pair of Strings.
|
|
+ # :call-seq:
|
|
+ # each {|expanded_name, value| ... }
|
|
+ #
|
|
+ # Calls the given block with each expanded-name/value pair:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # ele.attributes.each do |expanded_name, value|
|
|
+ # p [expanded_name, value]
|
|
+ # end
|
|
+ #
|
|
+ # Output:
|
|
+ #
|
|
+ # ["foo:att", "1"]
|
|
+ # ["bar:att", "2"]
|
|
+ # ["att", "<"]
|
|
#
|
|
- # doc = Document.new '<a x="1" y="2"/>'
|
|
- # doc.root.attributes.each {|name, value| p name+" => "+value }
|
|
def each
|
|
+ return to_enum(__method__) unless block_given?
|
|
each_attribute do |attr|
|
|
yield [attr.expanded_name, attr.value]
|
|
end
|
|
end
|
|
|
|
- # Fetches an attribute
|
|
- # name::
|
|
- # the name by which to search for the attribute. Can be a
|
|
- # <tt>prefix:name</tt> namespace name.
|
|
- # Returns:: The first matching attribute, or nil if there was none. This
|
|
- # value is an Attribute node, not the String value of the attribute.
|
|
- # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
|
|
- # doc.root.attributes.get_attribute("foo").value #-> "2"
|
|
- # doc.root.attributes.get_attribute("x:foo").value #-> "1"
|
|
+ # :call-seq:
|
|
+ # get_attribute(name) -> attribute_object or nil
|
|
+ #
|
|
+ # Returns the \REXML::Attribute object for the given +name+:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs.get_attribute('foo:att') # => foo:att='1'
|
|
+ # attrs.get_attribute('foo:att').class # => REXML::Attribute
|
|
+ # attrs.get_attribute('bar:att') # => bar:att='2'
|
|
+ # attrs.get_attribute('att') # => att='<'
|
|
+ # attrs.get_attribute('nosuch') # => nil
|
|
+ #
|
|
def get_attribute( name )
|
|
attr = fetch( name, nil )
|
|
if attr.nil?
|
|
@@ -1095,18 +2330,29 @@ module REXML
|
|
return attr
|
|
end
|
|
|
|
- # Sets an attribute, overwriting any existing attribute value by the
|
|
- # same name. Namespace is significant.
|
|
- # name:: the name of the attribute
|
|
- # value::
|
|
- # (optional) If supplied, the value of the attribute. If
|
|
- # nil, any existing matching attribute is deleted.
|
|
- # Returns::
|
|
- # Owning element
|
|
- # doc = Document.new "<a x:foo='1' foo='3'/>"
|
|
- # doc.root.attributes['y:foo'] = '2'
|
|
- # doc.root.attributes['foo'] = '4'
|
|
- # doc.root.attributes['x:foo'] = nil
|
|
+ # :call-seq:
|
|
+ # [name] = value -> value
|
|
+ #
|
|
+ # When +value+ is non-+nil+,
|
|
+ # assigns that to the attribute for the given +name+,
|
|
+ # overwriting the previous value if it exists:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs['foo:att'] = '2' # => "2"
|
|
+ # attrs['baz:att'] = '3' # => "3"
|
|
+ #
|
|
+ # When +value+ is +nil+, deletes the attribute if it exists:
|
|
+ #
|
|
+ # attrs['baz:att'] = nil
|
|
+ # attrs.include?('baz:att') # => false
|
|
+ #
|
|
def []=( name, value )
|
|
if value.nil? # Delete the named attribute
|
|
attr = get_attribute(name)
|
|
@@ -1130,28 +2376,35 @@ module REXML
|
|
old_attr[value.prefix] = value
|
|
elsif old_attr.prefix != value.prefix
|
|
# Check for conflicting namespaces
|
|
- raise ParseException.new(
|
|
- "Namespace conflict in adding attribute \"#{value.name}\": "+
|
|
- "Prefix \"#{old_attr.prefix}\" = "+
|
|
- "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
|
|
- "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
|
|
- value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
|
|
- @element.namespace( old_attr.prefix ) ==
|
|
- @element.namespace( value.prefix )
|
|
- store value.name, { old_attr.prefix => old_attr,
|
|
- value.prefix => value }
|
|
+ if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
|
+ old_namespace = old_attr.namespace
|
|
+ new_namespace = value.namespace
|
|
+ if old_namespace == new_namespace
|
|
+ raise ParseException.new(
|
|
+ "Namespace conflict in adding attribute \"#{value.name}\": "+
|
|
+ "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
|
+ "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
|
+ end
|
|
+ end
|
|
+ store value.name, {old_attr.prefix => old_attr,
|
|
+ value.prefix => value}
|
|
else
|
|
store value.name, value
|
|
end
|
|
return @element
|
|
end
|
|
|
|
- # Returns an array of Strings containing all of the prefixes declared
|
|
- # by this set of # attributes. The array does not include the default
|
|
+ # :call-seq:
|
|
+ # prefixes -> array_of_prefix_strings
|
|
+ #
|
|
+ # Returns an array of prefix strings in the attributes.
|
|
+ # The array does not include the default
|
|
# namespace declaration, if one exists.
|
|
- # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
|
|
- # "z='glorp' p:k='gru'/>")
|
|
- # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
|
|
+ #
|
|
+ # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>'
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.root.attributes.prefixes # => ["x", "y"]
|
|
+ #
|
|
def prefixes
|
|
ns = []
|
|
each_attribute do |attribute|
|
|
@@ -1168,6 +2421,15 @@ module REXML
|
|
ns
|
|
end
|
|
|
|
+ # :call-seq:
|
|
+ # namespaces
|
|
+ #
|
|
+ # Returns a hash of name/value pairs for the namespaces:
|
|
+ #
|
|
+ # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>'
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"}
|
|
+ #
|
|
def namespaces
|
|
namespaces = {}
|
|
each_attribute do |attribute|
|
|
@@ -1184,16 +2446,34 @@ module REXML
|
|
namespaces
|
|
end
|
|
|
|
- # Removes an attribute
|
|
- # attribute::
|
|
- # either a String, which is the name of the attribute to remove --
|
|
- # namespaces are significant here -- or the attribute to remove.
|
|
- # Returns:: the owning element
|
|
- # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
|
|
- # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
|
|
- # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
|
|
- # attr = doc.root.attributes.get_attribute('y:foo')
|
|
- # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
|
|
+ # :call-seq:
|
|
+ # delete(name) -> element
|
|
+ # delete(attribute) -> element
|
|
+ #
|
|
+ # Removes a specified attribute if it exists;
|
|
+ # returns the attributes' element.
|
|
+ #
|
|
+ # When string argument +name+ is given,
|
|
+ # removes the attribute of that name if it exists:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/>
|
|
+ # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/>
|
|
+ #
|
|
+ # When attribute argument +attribute+ is given,
|
|
+ # removes that attribute if it exists:
|
|
+ #
|
|
+ # attr = REXML::Attribute.new('bar:att', '2')
|
|
+ # attrs.delete(attr) # => <ele att='<'/> # => <ele att='<'/>
|
|
+ # attrs.delete(attr) # => <ele att='<'/> # => <ele/>
|
|
+ #
|
|
def delete( attribute )
|
|
name = nil
|
|
prefix = nil
|
|
@@ -1221,19 +2501,48 @@ module REXML
|
|
@element
|
|
end
|
|
|
|
- # Adds an attribute, overriding any existing attribute by the
|
|
- # same name. Namespaces are significant.
|
|
- # attribute:: An Attribute
|
|
+ # :call-seq:
|
|
+ # add(attribute) -> attribute
|
|
+ #
|
|
+ # Adds attribute +attribute+, replacing the previous
|
|
+ # attribute of the same name if it exists;
|
|
+ # returns +attribute+:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs # => {"att"=>{"foo"=>foo:att='1', "bar"=>bar:att='2', ""=>att='<'}}
|
|
+ # attrs.add(REXML::Attribute.new('foo:att', '2')) # => foo:att='2'
|
|
+ # attrs.add(REXML::Attribute.new('baz', '3')) # => baz='3'
|
|
+ # attrs.include?('baz') # => true
|
|
+ #
|
|
def add( attribute )
|
|
self[attribute.name] = attribute
|
|
end
|
|
|
|
alias :<< :add
|
|
|
|
- # Deletes all attributes matching a name. Namespaces are significant.
|
|
- # name::
|
|
- # A String; all attributes that match this path will be removed
|
|
- # Returns:: an Array of the Attributes that were removed
|
|
+ # :call-seq:
|
|
+ # delete_all(name) -> array_of_removed_attributes
|
|
+ #
|
|
+ # Removes all attributes matching the given +name+;
|
|
+ # returns an array of the removed attributes:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs.delete_all('att') # => [att='<']
|
|
+ #
|
|
def delete_all( name )
|
|
rv = []
|
|
each_attribute { |attribute|
|
|
@@ -1243,11 +2552,23 @@ module REXML
|
|
return rv
|
|
end
|
|
|
|
- # The +get_attribute_ns+ method retrieves a method by its namespace
|
|
- # and name. Thus it is possible to reliably identify an attribute
|
|
- # even if an XML processor has changed the prefix.
|
|
+ # :call-seq:
|
|
+ # get_attribute_ns(namespace, name)
|
|
+ #
|
|
+ # Returns the \REXML::Attribute object among the attributes
|
|
+ # that matches the given +namespace+ and +name+:
|
|
+ #
|
|
+ # xml_string = <<-EOT
|
|
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
|
|
+ # <ele foo:att='1' bar:att='2' att='<'/>
|
|
+ # </root>
|
|
+ # EOT
|
|
+ # d = REXML::Document.new(xml_string)
|
|
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
|
|
+ # attrs = ele.attributes
|
|
+ # attrs.get_attribute_ns('http://foo', 'att') # => foo:att='1'
|
|
+ # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil
|
|
#
|
|
- # Method contributed by Henrik Martensson
|
|
def get_attribute_ns(namespace, name)
|
|
result = nil
|
|
each_attribute() { |attribute|
|
|
diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb
|
|
index 97c7b6b..573db69 100644
|
|
--- a/lib/rexml/entity.rb
|
|
+++ b/lib/rexml/entity.rb
|
|
@@ -1,7 +1,7 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/child'
|
|
-require 'rexml/source'
|
|
-require 'rexml/xmltokens'
|
|
+require_relative 'child'
|
|
+require_relative 'source'
|
|
+require_relative 'xmltokens'
|
|
|
|
module REXML
|
|
class Entity < Child
|
|
@@ -90,7 +90,7 @@ module REXML
|
|
# object itself is valid.)
|
|
#
|
|
# out::
|
|
- # An object implementing <TT><<<TT> to which the entity will be
|
|
+ # An object implementing <TT><<</TT> to which the entity will be
|
|
# output
|
|
# indent::
|
|
# *DEPRECATED* and ignored
|
|
@@ -132,24 +132,34 @@ module REXML
|
|
# then:
|
|
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
|
def value
|
|
- if @value
|
|
- matches = @value.scan(PEREFERENCE_RE)
|
|
- rv = @value.clone
|
|
- if @parent
|
|
- sum = 0
|
|
- matches.each do |entity_reference|
|
|
- entity_value = @parent.entity( entity_reference[0] )
|
|
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
|
- raise "entity expansion has grown too large"
|
|
- else
|
|
- sum += entity_value.bytesize
|
|
- end
|
|
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
|
+ @resolved_value ||= resolve_value
|
|
+ end
|
|
+
|
|
+ def parent=(other)
|
|
+ @resolved_value = nil
|
|
+ super
|
|
+ end
|
|
+
|
|
+ private
|
|
+ def resolve_value
|
|
+ return nil if @value.nil?
|
|
+ return @value unless @value.match?(PEREFERENCE_RE)
|
|
+
|
|
+ matches = @value.scan(PEREFERENCE_RE)
|
|
+ rv = @value.clone
|
|
+ if @parent
|
|
+ sum = 0
|
|
+ matches.each do |entity_reference|
|
|
+ entity_value = @parent.entity( entity_reference[0] )
|
|
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
|
+ raise "entity expansion has grown too large"
|
|
+ else
|
|
+ sum += entity_value.bytesize
|
|
end
|
|
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
|
end
|
|
- return rv
|
|
end
|
|
- nil
|
|
+ rv
|
|
end
|
|
end
|
|
|
|
diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb
|
|
index c375f14..811b2ff 100644
|
|
--- a/lib/rexml/formatters/default.rb
|
|
+++ b/lib/rexml/formatters/default.rb
|
|
@@ -1,4 +1,5 @@
|
|
# frozen_string_literal: false
|
|
+
|
|
module REXML
|
|
module Formatters
|
|
class Default
|
|
@@ -101,11 +102,14 @@ module REXML
|
|
end
|
|
|
|
def write_instruction( node, output )
|
|
- output << Instruction::START.sub(/\\/u, '')
|
|
+ output << Instruction::START
|
|
output << node.target
|
|
- output << ' '
|
|
- output << node.content
|
|
- output << Instruction::STOP.sub(/\\/u, '')
|
|
+ content = node.content
|
|
+ if content
|
|
+ output << ' '
|
|
+ output << content
|
|
+ end
|
|
+ output << Instruction::STOP
|
|
end
|
|
end
|
|
end
|
|
diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb
|
|
index a80274b..a1198b7 100644
|
|
--- a/lib/rexml/formatters/pretty.rb
|
|
+++ b/lib/rexml/formatters/pretty.rb
|
|
@@ -1,5 +1,5 @@
|
|
-# frozen_string_literal: false
|
|
-require 'rexml/formatters/default'
|
|
+# frozen_string_literal: true
|
|
+require_relative 'default'
|
|
|
|
module REXML
|
|
module Formatters
|
|
@@ -58,7 +58,7 @@ module REXML
|
|
skip = false
|
|
if compact
|
|
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
|
- string = ""
|
|
+ string = +""
|
|
old_level = @level
|
|
@level = 0
|
|
node.children.each { |child| write( child, string ) }
|
|
diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb
|
|
index 81e67f3..5ff51e1 100644
|
|
--- a/lib/rexml/formatters/transitive.rb
|
|
+++ b/lib/rexml/formatters/transitive.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/formatters/pretty'
|
|
+require_relative 'pretty'
|
|
|
|
module REXML
|
|
module Formatters
|
|
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
|
|
index cd879fd..4c11461 100644
|
|
--- a/lib/rexml/functions.rb
|
|
+++ b/lib/rexml/functions.rb
|
|
@@ -66,11 +66,11 @@ module REXML
|
|
def Functions::id( object )
|
|
end
|
|
|
|
- # UNTESTED
|
|
- def Functions::local_name( node_set=nil )
|
|
- get_namespace( node_set ) do |node|
|
|
+ def Functions::local_name(node_set=nil)
|
|
+ get_namespace(node_set) do |node|
|
|
return node.local_name
|
|
end
|
|
+ ""
|
|
end
|
|
|
|
def Functions::namespace_uri( node_set=nil )
|
|
@@ -86,10 +86,14 @@ module REXML
|
|
# Helper method.
|
|
def Functions::get_namespace( node_set = nil )
|
|
if node_set == nil
|
|
- yield @@context[:node] if defined? @@context[:node].namespace
|
|
+ yield @@context[:node] if @@context[:node].respond_to?(:namespace)
|
|
else
|
|
if node_set.respond_to? :each
|
|
- node_set.each { |node| yield node if defined? node.namespace }
|
|
+ result = []
|
|
+ node_set.each do |node|
|
|
+ result << yield(node) if node.respond_to?(:namespace)
|
|
+ end
|
|
+ result
|
|
elsif node_set.respond_to? :namespace
|
|
yield node_set
|
|
end
|
|
@@ -131,22 +135,38 @@ module REXML
|
|
#
|
|
# An object of a type other than the four basic types is converted to a
|
|
# string in a way that is dependent on that type.
|
|
- def Functions::string( object=nil )
|
|
- #object = @context unless object
|
|
- if object.instance_of? Array
|
|
- string( object[0] )
|
|
- elsif defined? object.node_type
|
|
- if object.node_type == :attribute
|
|
+ def Functions::string( object=@@context[:node] )
|
|
+ if object.respond_to?(:node_type)
|
|
+ case object.node_type
|
|
+ when :attribute
|
|
object.value
|
|
- elsif object.node_type == :element || object.node_type == :document
|
|
+ when :element
|
|
string_value(object)
|
|
+ when :document
|
|
+ string_value(object.root)
|
|
+ when :processing_instruction
|
|
+ object.content
|
|
else
|
|
object.to_s
|
|
end
|
|
- elsif object.nil?
|
|
- return ""
|
|
else
|
|
- object.to_s
|
|
+ case object
|
|
+ when Array
|
|
+ string(object[0])
|
|
+ when Float
|
|
+ if object.nan?
|
|
+ "NaN"
|
|
+ else
|
|
+ integer = object.to_i
|
|
+ if object == integer
|
|
+ "%d" % integer
|
|
+ else
|
|
+ object.to_s
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ object.to_s
|
|
+ end
|
|
end
|
|
end
|
|
|
|
@@ -167,9 +187,12 @@ module REXML
|
|
rv
|
|
end
|
|
|
|
- # UNTESTED
|
|
def Functions::concat( *objects )
|
|
- objects.join
|
|
+ concatenated = ""
|
|
+ objects.each do |object|
|
|
+ concatenated << string(object)
|
|
+ end
|
|
+ concatenated
|
|
end
|
|
|
|
# Fixed by Mike Stok
|
|
@@ -239,11 +262,10 @@ module REXML
|
|
string(string).length
|
|
end
|
|
|
|
- # UNTESTED
|
|
def Functions::normalize_space( string=nil )
|
|
string = string(@@context[:node]) if string.nil?
|
|
if string.kind_of? Array
|
|
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
|
|
+ string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
|
else
|
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
|
end
|
|
@@ -292,18 +314,23 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # UNTESTED
|
|
- def Functions::boolean( object=nil )
|
|
- if object.kind_of? String
|
|
- if object =~ /\d+/u
|
|
- return object.to_f != 0
|
|
- else
|
|
- return object.size > 0
|
|
- end
|
|
- elsif object.kind_of? Array
|
|
- object = object.find{|x| x and true}
|
|
+ def Functions::boolean(object=@@context[:node])
|
|
+ case object
|
|
+ when true, false
|
|
+ object
|
|
+ when Float
|
|
+ return false if object.zero?
|
|
+ return false if object.nan?
|
|
+ true
|
|
+ when Numeric
|
|
+ not object.zero?
|
|
+ when String
|
|
+ not object.empty?
|
|
+ when Array
|
|
+ not object.empty?
|
|
+ else
|
|
+ object ? true : false
|
|
end
|
|
- return object ? true : false
|
|
end
|
|
|
|
# UNTESTED
|
|
@@ -357,25 +384,23 @@ module REXML
|
|
#
|
|
# an object of a type other than the four basic types is converted to a
|
|
# number in a way that is dependent on that type
|
|
- def Functions::number( object=nil )
|
|
- object = @@context[:node] unless object
|
|
+ def Functions::number(object=@@context[:node])
|
|
case object
|
|
when true
|
|
Float(1)
|
|
when false
|
|
Float(0)
|
|
when Array
|
|
- number(string( object ))
|
|
+ number(string(object))
|
|
when Numeric
|
|
object.to_f
|
|
else
|
|
- str = string( object )
|
|
- # If XPath ever gets scientific notation...
|
|
- #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
|
|
- if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
|
|
- str.to_f
|
|
+ str = string(object)
|
|
+ case str.strip
|
|
+ when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/
|
|
+ $1.to_f
|
|
else
|
|
- (0.0 / 0.0)
|
|
+ Float::NAN
|
|
end
|
|
end
|
|
end
|
|
@@ -397,7 +422,7 @@ module REXML
|
|
number = number(number)
|
|
begin
|
|
neg = number.negative?
|
|
- number = number.abs.round(half: :up)
|
|
+ number = number.abs.round
|
|
neg ? -number : number
|
|
rescue FloatDomainError
|
|
number
|
|
diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb
|
|
index c4f65ee..318741f 100644
|
|
--- a/lib/rexml/instruction.rb
|
|
+++ b/lib/rexml/instruction.rb
|
|
@@ -1,13 +1,14 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
-require "rexml/source"
|
|
+
|
|
+require_relative "child"
|
|
+require_relative "source"
|
|
|
|
module REXML
|
|
# Represents an XML Instruction; IE, <? ... ?>
|
|
# TODO: Add parent arg (3rd arg) to constructor
|
|
class Instruction < Child
|
|
- START = '<\?'
|
|
- STOP = '\?>'
|
|
+ START = "<?"
|
|
+ STOP = "?>"
|
|
|
|
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
|
|
# content is everything else.
|
|
@@ -17,20 +18,25 @@ module REXML
|
|
# @param target can be one of a number of things. If String, then
|
|
# the target of this instruction is set to this. If an Instruction,
|
|
# then the Instruction is shallowly cloned (target and content are
|
|
- # copied). If a Source, then the source is scanned and parsed for
|
|
- # an Instruction declaration.
|
|
+ # copied).
|
|
# @param content Must be either a String, or a Parent. Can only
|
|
# be a Parent if the target argument is a Source. Otherwise, this
|
|
# String is set as the content of this instruction.
|
|
def initialize(target, content=nil)
|
|
- if target.kind_of? String
|
|
+ case target
|
|
+ when String
|
|
super()
|
|
@target = target
|
|
@content = content
|
|
- elsif target.kind_of? Instruction
|
|
+ when Instruction
|
|
super(content)
|
|
@target = target.target
|
|
@content = target.content
|
|
+ else
|
|
+ message =
|
|
+ "processing instruction target must be String or REXML::Instruction: "
|
|
+ message << "<#{target.inspect}>"
|
|
+ raise ArgumentError, message
|
|
end
|
|
@content.strip! if @content
|
|
end
|
|
@@ -45,11 +51,13 @@ module REXML
|
|
def write writer, indent=-1, transitive=false, ie_hack=false
|
|
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
|
|
indent(writer, indent)
|
|
- writer << START.sub(/\\/u, '')
|
|
+ writer << START
|
|
writer << @target
|
|
- writer << ' '
|
|
- writer << @content
|
|
- writer << STOP.sub(/\\/u, '')
|
|
+ if @content
|
|
+ writer << ' '
|
|
+ writer << @content
|
|
+ end
|
|
+ writer << STOP
|
|
end
|
|
|
|
# @return true if other is an Instruction, and the content and target
|
|
diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb
|
|
index d58119a..3dab885 100644
|
|
--- a/lib/rexml/light/node.rb
|
|
+++ b/lib/rexml/light/node.rb
|
|
@@ -1,14 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/xmltokens'
|
|
-
|
|
-# [ :element, parent, name, attributes, children* ]
|
|
- # a = Node.new
|
|
- # a << "B" # => <a>B</a>
|
|
- # a.b # => <a>B<b/></a>
|
|
- # a.b[1] # => <a>B<b/><b/><a>
|
|
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
|
|
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
|
|
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
|
|
+require_relative '../xmltokens'
|
|
+
|
|
module REXML
|
|
module Light
|
|
# Represents a tagged XML element. Elements are characterized by
|
|
diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb
|
|
index 90ba7cc..2e67252 100644
|
|
--- a/lib/rexml/namespace.rb
|
|
+++ b/lib/rexml/namespace.rb
|
|
@@ -1,5 +1,6 @@
|
|
-# frozen_string_literal: false
|
|
-require 'rexml/xmltokens'
|
|
+# frozen_string_literal: true
|
|
+
|
|
+require_relative 'xmltokens'
|
|
|
|
module REXML
|
|
# Adds named attributes to an object.
|
|
@@ -9,19 +10,33 @@ module REXML
|
|
# The expanded name of the object, valid if name is set
|
|
attr_accessor :prefix
|
|
include XMLTokens
|
|
+ NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
|
|
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
|
|
|
# Sets the name and the expanded name
|
|
def name=( name )
|
|
@expanded_name = name
|
|
- name =~ NAMESPLIT
|
|
- if $1
|
|
- @prefix = $1
|
|
- else
|
|
+ if name.match?(NAME_WITHOUT_NAMESPACE)
|
|
@prefix = ""
|
|
@namespace = ""
|
|
+ @name = name
|
|
+ elsif name =~ NAMESPLIT
|
|
+ if $1
|
|
+ @prefix = $1
|
|
+ else
|
|
+ @prefix = ""
|
|
+ @namespace = ""
|
|
+ end
|
|
+ @name = $2
|
|
+ elsif name == ""
|
|
+ @prefix = nil
|
|
+ @namespace = nil
|
|
+ @name = nil
|
|
+ else
|
|
+ message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: "
|
|
+ message += "<#{name.inspect}>"
|
|
+ raise ArgumentError, message
|
|
end
|
|
- @name = $2
|
|
end
|
|
|
|
# Compares names optionally WITH namespaces
|
|
diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb
|
|
index 52337ad..c771db7 100644
|
|
--- a/lib/rexml/node.rb
|
|
+++ b/lib/rexml/node.rb
|
|
@@ -1,7 +1,7 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/parseexception"
|
|
-require "rexml/formatters/pretty"
|
|
-require "rexml/formatters/default"
|
|
+require_relative "parseexception"
|
|
+require_relative "formatters/pretty"
|
|
+require_relative "formatters/default"
|
|
|
|
module REXML
|
|
# Represents a node in the tree. Nodes are never encountered except as
|
|
@@ -52,10 +52,14 @@ module REXML
|
|
|
|
# Visit all subnodes of +self+ recursively
|
|
def each_recursive(&block) # :yields: node
|
|
- self.elements.each {|node|
|
|
- block.call(node)
|
|
- node.each_recursive(&block)
|
|
- }
|
|
+ stack = []
|
|
+ each { |child| stack.unshift child if child.node_type == :element }
|
|
+ until stack.empty?
|
|
+ child = stack.pop
|
|
+ yield child
|
|
+ n = stack.size
|
|
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
|
+ end
|
|
end
|
|
|
|
# Find (and return) first subnode (recursively) for which the block
|
|
diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb
|
|
index 96dfea5..88a5fb3 100644
|
|
--- a/lib/rexml/output.rb
|
|
+++ b/lib/rexml/output.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/encoding'
|
|
+require_relative 'encoding'
|
|
|
|
module REXML
|
|
class Output
|
|
diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb
|
|
index 3bd0a96..6a53b37 100644
|
|
--- a/lib/rexml/parent.rb
|
|
+++ b/lib/rexml/parent.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/child"
|
|
+require_relative "child"
|
|
|
|
module REXML
|
|
# A parent has children, and has methods for accessing them. The Parent
|
|
diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb
|
|
index 7b16cd1..e57d05f 100644
|
|
--- a/lib/rexml/parseexception.rb
|
|
+++ b/lib/rexml/parseexception.rb
|
|
@@ -29,6 +29,7 @@ module REXML
|
|
err << "\nLine: #{line}\n"
|
|
err << "Position: #{position}\n"
|
|
err << "Last 80 unconsumed characters:\n"
|
|
+ err.force_encoding("ASCII-8BIT")
|
|
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
|
end
|
|
|
|
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
|
|
index e7ef695..275372e 100644
|
|
--- a/lib/rexml/parsers/baseparser.rb
|
|
+++ b/lib/rexml/parsers/baseparser.rb
|
|
@@ -1,14 +1,23 @@
|
|
-# frozen_string_literal: false
|
|
-
|
|
-require "strscan"
|
|
-
|
|
-require 'rexml/parseexception'
|
|
-require 'rexml/undefinednamespaceexception'
|
|
-require 'rexml/source'
|
|
+# frozen_string_literal: true
|
|
+require_relative '../parseexception'
|
|
+require_relative '../undefinednamespaceexception'
|
|
+require_relative '../source'
|
|
require 'set'
|
|
+require "strscan"
|
|
|
|
module REXML
|
|
module Parsers
|
|
+ if StringScanner::Version < "3.0.8"
|
|
+ module StringScannerCaptures
|
|
+ refine StringScanner do
|
|
+ def captures
|
|
+ values_at(*(1...size))
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ using StringScannerCaptures
|
|
+ end
|
|
+
|
|
# = Using the Pull Parser
|
|
# <em>This API is experimental, and subject to change.</em>
|
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
@@ -61,7 +70,7 @@ module REXML
|
|
XMLDECL_START = /\A<\?xml\s/u;
|
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
|
INSTRUCTION_START = /\A<\?/u
|
|
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
|
+ INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
|
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
|
|
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
|
|
|
|
@@ -98,7 +107,7 @@ module REXML
|
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
|
+ ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
|
|
|
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
|
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
|
@@ -114,16 +123,29 @@ module REXML
|
|
"apos" => [/'/, "'", "'", /'/]
|
|
}
|
|
|
|
-
|
|
- ######################################################################
|
|
- # These are patterns to identify common markup errors, to make the
|
|
- # error messages more informative.
|
|
- ######################################################################
|
|
- MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um
|
|
+ module Private
|
|
+ INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
|
+ TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
+ CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
+ ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
+ NAME_PATTERN = /\s*#{NAME}/um
|
|
+ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
+ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
+ ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
+ CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
+ DEFAULT_ENTITIES_PATTERNS = {}
|
|
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
+ default_entities.each do |term|
|
|
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
+ end
|
|
+ end
|
|
+ private_constant :Private
|
|
|
|
def initialize( source )
|
|
self.stream = source
|
|
@listeners = []
|
|
+ @prefixes = Set.new
|
|
end
|
|
|
|
def add_listener( listener )
|
|
@@ -189,6 +211,8 @@ module REXML
|
|
|
|
# Returns the next event. This is a +PullEvent+ object.
|
|
def pull
|
|
+ @source.drop_parsed_content
|
|
+
|
|
pull_event.tap do |event|
|
|
@listeners.each do |listener|
|
|
listener.receive event
|
|
@@ -201,248 +225,257 @@ module REXML
|
|
x, @closed = @closed, nil
|
|
return [ :end_element, x ]
|
|
end
|
|
- return [ :end_document ] if empty?
|
|
+ if empty?
|
|
+ if @document_status == :in_doctype
|
|
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
|
+ end
|
|
+ return [ :end_document ]
|
|
+ end
|
|
return @stack.shift if @stack.size > 0
|
|
#STDERR.puts @source.encoding
|
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
+
|
|
+ @source.ensure_buffer
|
|
if @document_status == nil
|
|
- word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
|
|
- word = word[1] unless word.nil?
|
|
- #STDERR.puts "WORD = #{word.inspect}"
|
|
- case word
|
|
- when COMMENT_START
|
|
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
|
- when XMLDECL_START
|
|
- #STDERR.puts "XMLDECL"
|
|
- results = @source.match( XMLDECL_PATTERN, true )[1]
|
|
- version = VERSION.match( results )
|
|
- version = version[1] unless version.nil?
|
|
- encoding = ENCODING.match(results)
|
|
- encoding = encoding[1] unless encoding.nil?
|
|
- if need_source_encoding_update?(encoding)
|
|
- @source.encoding = encoding
|
|
- end
|
|
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
|
- encoding = "UTF-16"
|
|
- end
|
|
- standalone = STANDALONE.match(results)
|
|
- standalone = standalone[1] unless standalone.nil?
|
|
- return [ :xmldecl, version, encoding, standalone ]
|
|
- when INSTRUCTION_START
|
|
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
|
- when DOCTYPE_START
|
|
- base_error_message = "Malformed DOCTYPE"
|
|
- @source.match(DOCTYPE_START, true)
|
|
- @nsstack.unshift(curr_ns=Set.new)
|
|
- name = parse_name(base_error_message)
|
|
- if @source.match(/\A\s*\[/um, true)
|
|
- id = [nil, nil, nil]
|
|
- @document_status = :in_doctype
|
|
- elsif @source.match(/\A\s*>/um, true)
|
|
- id = [nil, nil, nil]
|
|
- @document_status = :after_doctype
|
|
- else
|
|
- id = parse_id(base_error_message,
|
|
- accept_external_id: true,
|
|
- accept_public_id: false)
|
|
- if id[0] == "SYSTEM"
|
|
- # For backward compatibility
|
|
- id[1], id[2] = id[2], nil
|
|
+ start_position = @source.position
|
|
+ if @source.match("<?", true)
|
|
+ return process_instruction(start_position)
|
|
+ elsif @source.match("<!", true)
|
|
+ if @source.match("--", true)
|
|
+ md = @source.match(/(.*?)-->/um, true)
|
|
+ if md.nil?
|
|
+ raise REXML::ParseException.new("Unclosed comment", @source)
|
|
+ end
|
|
+ if /--|-\z/.match?(md[1])
|
|
+ raise REXML::ParseException.new("Malformed comment", @source)
|
|
+ end
|
|
+ return [ :comment, md[1] ]
|
|
+ elsif @source.match("DOCTYPE", true)
|
|
+ base_error_message = "Malformed DOCTYPE"
|
|
+ unless @source.match(/\s+/um, true)
|
|
+ if @source.match(">")
|
|
+ message = "#{base_error_message}: name is missing"
|
|
+ else
|
|
+ message = "#{base_error_message}: invalid name"
|
|
+ end
|
|
+ @source.position = start_position
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
end
|
|
- if @source.match(/\A\s*\[/um, true)
|
|
- @document_status = :in_doctype
|
|
- elsif @source.match(/\A\s*>/um, true)
|
|
+ @nsstack.unshift(Set.new)
|
|
+ name = parse_name(base_error_message)
|
|
+ if @source.match(/\s*\[/um, true)
|
|
+ id = [nil, nil, nil]
|
|
+ @document_status = :in_doctype
|
|
+ elsif @source.match(/\s*>/um, true)
|
|
+ id = [nil, nil, nil]
|
|
@document_status = :after_doctype
|
|
+ @source.ensure_buffer
|
|
else
|
|
- message = "#{base_error_message}: garbage after external ID"
|
|
- raise REXML::ParseException.new(message, @source)
|
|
+ id = parse_id(base_error_message,
|
|
+ accept_external_id: true,
|
|
+ accept_public_id: false)
|
|
+ if id[0] == "SYSTEM"
|
|
+ # For backward compatibility
|
|
+ id[1], id[2] = id[2], nil
|
|
+ end
|
|
+ if @source.match(/\s*\[/um, true)
|
|
+ @document_status = :in_doctype
|
|
+ elsif @source.match(/\s*>/um, true)
|
|
+ @document_status = :after_doctype
|
|
+ @source.ensure_buffer
|
|
+ else
|
|
+ message = "#{base_error_message}: garbage after external ID"
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
+ end
|
|
end
|
|
- end
|
|
- args = [:start_doctype, name, *id]
|
|
- if @document_status == :after_doctype
|
|
- @source.match(/\A\s*/um, true)
|
|
- @stack << [ :end_doctype ]
|
|
- end
|
|
- return args
|
|
- when /\A\s+/
|
|
- else
|
|
- @document_status = :after_doctype
|
|
- if @source.encoding == "UTF-8"
|
|
- @source.buffer.force_encoding(::Encoding::UTF_8)
|
|
+ args = [:start_doctype, name, *id]
|
|
+ if @document_status == :after_doctype
|
|
+ @source.match(/\s*/um, true)
|
|
+ @stack << [ :end_doctype ]
|
|
+ end
|
|
+ return args
|
|
+ else
|
|
+ message = "Invalid XML"
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
end
|
|
end
|
|
end
|
|
if @document_status == :in_doctype
|
|
- md = @source.match(/\A\s*(.*?>)/um)
|
|
- case md[1]
|
|
- when SYSTEMENTITY
|
|
- match = @source.match( SYSTEMENTITY, true )[1]
|
|
- return [ :externalentity, match ]
|
|
-
|
|
- when ELEMENTDECL_START
|
|
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
|
-
|
|
- when ENTITY_START
|
|
- match = @source.match( ENTITYDECL, true ).to_a.compact
|
|
- match[0] = :entitydecl
|
|
- ref = false
|
|
- if match[1] == '%'
|
|
- ref = true
|
|
- match.delete_at 1
|
|
- end
|
|
- # Now we have to sort out what kind of entity reference this is
|
|
- if match[2] == 'SYSTEM'
|
|
- # External reference
|
|
- match[3] = match[3][1..-2] # PUBID
|
|
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
- elsif match[2] == 'PUBLIC'
|
|
- # External reference
|
|
- match[3] = match[3][1..-2] # PUBID
|
|
- match[4] = match[4][1..-2] # HREF
|
|
- match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
- else
|
|
- match[2] = match[2][1..-2]
|
|
- match.pop if match.size == 4
|
|
- # match is [ :entity, name, value ]
|
|
- end
|
|
- match << '%' if ref
|
|
- return match
|
|
- when ATTLISTDECL_START
|
|
- md = @source.match( ATTLISTDECL_PATTERN, true )
|
|
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
- element = md[1]
|
|
- contents = md[0]
|
|
-
|
|
- pairs = {}
|
|
- values = md[0].scan( ATTDEF_RE )
|
|
- values.each do |attdef|
|
|
- unless attdef[3] == "#IMPLIED"
|
|
- attdef.compact!
|
|
- val = attdef[3]
|
|
- val = attdef[4] if val == "#FIXED "
|
|
- pairs[attdef[0]] = val
|
|
- if attdef[0] =~ /^xmlns:(.*)/
|
|
- @nsstack[0] << $1
|
|
- end
|
|
+ @source.match(/\s*/um, true) # skip spaces
|
|
+ start_position = @source.position
|
|
+ if @source.match("<!", true)
|
|
+ if @source.match("ELEMENT", true)
|
|
+ md = @source.match(/(.*?)>/um, true)
|
|
+ raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
+ return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
+ elsif @source.match("ENTITY", true)
|
|
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
|
+ ref = false
|
|
+ if match[1] == '%'
|
|
+ ref = true
|
|
+ match.delete_at 1
|
|
end
|
|
- end
|
|
- return [ :attlistdecl, element, pairs, contents ]
|
|
- when NOTATIONDECL_START
|
|
- base_error_message = "Malformed notation declaration"
|
|
- unless @source.match(/\A\s*<!NOTATION\s+/um, true)
|
|
- if @source.match(/\A\s*<!NOTATION\s*>/um)
|
|
- message = "#{base_error_message}: name is missing"
|
|
+ # Now we have to sort out what kind of entity reference this is
|
|
+ if match[2] == 'SYSTEM'
|
|
+ # External reference
|
|
+ match[3] = match[3][1..-2] # PUBID
|
|
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
+ elsif match[2] == 'PUBLIC'
|
|
+ # External reference
|
|
+ match[3] = match[3][1..-2] # PUBID
|
|
+ match[4] = match[4][1..-2] # HREF
|
|
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
else
|
|
- message = "#{base_error_message}: invalid declaration name"
|
|
+ match[2] = match[2][1..-2]
|
|
+ match.pop if match.size == 4
|
|
+ # match is [ :entity, name, value ]
|
|
end
|
|
- raise REXML::ParseException.new(message, @source)
|
|
- end
|
|
- name = parse_name(base_error_message)
|
|
- id = parse_id(base_error_message,
|
|
- accept_external_id: true,
|
|
- accept_public_id: true)
|
|
- unless @source.match(/\A\s*>/um, true)
|
|
- message = "#{base_error_message}: garbage before end >"
|
|
- raise REXML::ParseException.new(message, @source)
|
|
+ match << '%' if ref
|
|
+ return match
|
|
+ elsif @source.match("ATTLIST", true)
|
|
+ md = @source.match(Private::ATTLISTDECL_END, true)
|
|
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
+ element = md[1]
|
|
+ contents = md[0]
|
|
+
|
|
+ pairs = {}
|
|
+ values = md[0].scan( ATTDEF_RE )
|
|
+ values.each do |attdef|
|
|
+ unless attdef[3] == "#IMPLIED"
|
|
+ attdef.compact!
|
|
+ val = attdef[3]
|
|
+ val = attdef[4] if val == "#FIXED "
|
|
+ pairs[attdef[0]] = val
|
|
+ if attdef[0] =~ /^xmlns:(.*)/
|
|
+ @nsstack[0] << $1
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ return [ :attlistdecl, element, pairs, contents ]
|
|
+ elsif @source.match("NOTATION", true)
|
|
+ base_error_message = "Malformed notation declaration"
|
|
+ unless @source.match(/\s+/um, true)
|
|
+ if @source.match(">")
|
|
+ message = "#{base_error_message}: name is missing"
|
|
+ else
|
|
+ message = "#{base_error_message}: invalid name"
|
|
+ end
|
|
+ @source.position = start_position
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
+ end
|
|
+ name = parse_name(base_error_message)
|
|
+ id = parse_id(base_error_message,
|
|
+ accept_external_id: true,
|
|
+ accept_public_id: true)
|
|
+ unless @source.match(/\s*>/um, true)
|
|
+ message = "#{base_error_message}: garbage before end >"
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
+ end
|
|
+ return [:notationdecl, name, *id]
|
|
+ elsif md = @source.match(/--(.*?)-->/um, true)
|
|
+ case md[1]
|
|
+ when /--/, /-\z/
|
|
+ raise REXML::ParseException.new("Malformed comment", @source)
|
|
+ end
|
|
+ return [ :comment, md[1] ] if md
|
|
end
|
|
- return [:notationdecl, name, *id]
|
|
- when DOCTYPE_END
|
|
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
+ return [ :externalentity, match[1] ]
|
|
+ elsif @source.match(/\]\s*>/um, true)
|
|
@document_status = :after_doctype
|
|
- @source.match( DOCTYPE_END, true )
|
|
return [ :end_doctype ]
|
|
end
|
|
+ if @document_status == :in_doctype
|
|
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
+ end
|
|
end
|
|
if @document_status == :after_doctype
|
|
- @source.match(/\A\s*/um, true)
|
|
+ @source.match(/\s*/um, true)
|
|
end
|
|
begin
|
|
- @source.read if @source.buffer.size<2
|
|
- if @source.buffer[0] == ?<
|
|
- if @source.buffer[1] == ?/
|
|
+ start_position = @source.position
|
|
+ if @source.match("<", true)
|
|
+ # :text's read_until may remain only "<" in buffer. In the
|
|
+ # case, buffer is empty here. So we need to fill buffer
|
|
+ # here explicitly.
|
|
+ @source.ensure_buffer
|
|
+ if @source.match("/", true)
|
|
@nsstack.shift
|
|
last_tag = @tags.pop
|
|
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
|
|
- md = @source.match( CLOSE_MATCH, true )
|
|
+ md = @source.match(Private::CLOSE_PATTERN, true)
|
|
if md and !last_tag
|
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
if md.nil? or last_tag != md[1]
|
|
message = "Missing end tag for '#{last_tag}'"
|
|
- message << " (got '#{md[1]}')" if md
|
|
+ message += " (got '#{md[1]}')" if md
|
|
+ @source.position = start_position if md.nil?
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
return [ :end_element, last_tag ]
|
|
- elsif @source.buffer[1] == ?!
|
|
- md = @source.match(/\A(\s*[^>]*>)/um)
|
|
+ elsif @source.match("!", true)
|
|
+ md = @source.match(/([^>]*>)/um)
|
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
|
- if md[0][2] == ?-
|
|
- md = @source.match( COMMENT_PATTERN, true )
|
|
+ if md[0][0] == ?-
|
|
+ md = @source.match(/--(.*?)-->/um, true)
|
|
|
|
- case md[1]
|
|
- when /--/, /-\z/
|
|
+ if md.nil? || /--|-\z/.match?(md[1])
|
|
raise REXML::ParseException.new("Malformed comment", @source)
|
|
end
|
|
|
|
- return [ :comment, md[1] ] if md
|
|
+ return [ :comment, md[1] ]
|
|
else
|
|
- md = @source.match( CDATA_PATTERN, true )
|
|
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
|
return [ :cdata, md[1] ] if md
|
|
end
|
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
|
"in the doctype declaration.", @source)
|
|
- elsif @source.buffer[1] == ??
|
|
- md = @source.match( INSTRUCTION_PATTERN, true )
|
|
- return [ :processing_instruction, md[1], md[2] ] if md
|
|
- raise REXML::ParseException.new( "Bad instruction declaration",
|
|
- @source)
|
|
+ elsif @source.match("?", true)
|
|
+ return process_instruction(start_position)
|
|
else
|
|
# Get the next tag
|
|
- md = @source.match(TAG_MATCH, true)
|
|
+ md = @source.match(Private::TAG_PATTERN, true)
|
|
unless md
|
|
- # Check for missing attribute quotes
|
|
- raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
|
+ @source.position = start_position
|
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
|
end
|
|
+ tag = md[1]
|
|
@document_status = :in_element
|
|
- prefixes = Set.new
|
|
- prefixes << md[2] if md[2]
|
|
+ @prefixes.clear
|
|
+ @prefixes << md[2] if md[2]
|
|
@nsstack.unshift(curr_ns=Set.new)
|
|
- attributes, closed = parse_attributes(prefixes, curr_ns)
|
|
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
|
|
# Verify that all of the prefixes have been defined
|
|
- for prefix in prefixes
|
|
+ for prefix in @prefixes
|
|
unless @nsstack.find{|k| k.member?(prefix)}
|
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
|
end
|
|
end
|
|
|
|
if closed
|
|
- @closed = md[1]
|
|
+ @closed = tag
|
|
@nsstack.shift
|
|
else
|
|
- @tags.push( md[1] )
|
|
+ @tags.push( tag )
|
|
end
|
|
- return [ :start_element, md[1], attributes ]
|
|
+ return [ :start_element, tag, attributes ]
|
|
end
|
|
else
|
|
- md = @source.match( TEXT_PATTERN, true )
|
|
- if md[0].length == 0
|
|
- @source.match( /(\s+)/, true )
|
|
+ text = @source.read_until("<")
|
|
+ if text.chomp!("<")
|
|
+ @source.position -= "<".bytesize
|
|
end
|
|
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
|
- #return [ :text, "" ] if md[0].length == 0
|
|
- # unnormalized = Text::unnormalize( md[1], self )
|
|
- # return PullEvent.new( :text, md[1], unnormalized )
|
|
- return [ :text, md[1] ]
|
|
+ return [ :text, text ]
|
|
end
|
|
rescue REXML::UndefinedNamespaceException
|
|
raise
|
|
rescue REXML::ParseException
|
|
raise
|
|
- rescue Exception, NameError => error
|
|
+ rescue => error
|
|
raise REXML::ParseException.new( "Exception parsing",
|
|
@source, self, (error ? error : $!) )
|
|
end
|
|
@@ -478,11 +511,10 @@ module REXML
|
|
|
|
# Unescapes all possible entities
|
|
def unnormalize( string, entities=nil, filter=nil )
|
|
- rv = string.clone
|
|
- rv.gsub!( /\r\n?/, "\n" )
|
|
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
|
matches = rv.scan( REFERENCE_RE )
|
|
return rv if matches.size == 0
|
|
- rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
|
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
m=$1
|
|
m = "0#{m}" if m[0] == ?x
|
|
[Integer(m)].pack('U*')
|
|
@@ -493,7 +525,7 @@ module REXML
|
|
unless filter and filter.include?(entity_reference)
|
|
entity_value = entity( entity_reference, entities )
|
|
if entity_value
|
|
- re = /&#{entity_reference};/
|
|
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
|
rv.gsub!( re, entity_value )
|
|
else
|
|
er = DEFAULT_ENTITIES[entity_reference]
|
|
@@ -501,7 +533,7 @@ module REXML
|
|
end
|
|
end
|
|
end
|
|
- rv.gsub!( /&/, '&' )
|
|
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
|
end
|
|
rv
|
|
end
|
|
@@ -514,9 +546,9 @@ module REXML
|
|
end
|
|
|
|
def parse_name(base_error_message)
|
|
- md = @source.match(/\A\s*#{NAME}/um, true)
|
|
+ md = @source.match(Private::NAME_PATTERN, true)
|
|
unless md
|
|
- if @source.match(/\A\s*\S/um)
|
|
+ if @source.match(/\s*\S/um)
|
|
message = "#{base_error_message}: invalid name"
|
|
else
|
|
message = "#{base_error_message}: name is missing"
|
|
@@ -592,88 +624,91 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- def parse_attributes(prefixes, curr_ns)
|
|
- attributes = {}
|
|
- closed = false
|
|
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
|
- if match_data.nil?
|
|
- message = "Start tag isn't ended"
|
|
+ def process_instruction(start_position)
|
|
+ match_data = @source.match(Private::INSTRUCTION_END, true)
|
|
+ unless match_data
|
|
+ message = "Invalid processing instruction node"
|
|
+ @source.position = start_position
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
-
|
|
- raw_attributes = match_data[1]
|
|
- closed = !match_data[2].nil?
|
|
- return attributes, closed if raw_attributes.nil?
|
|
- return attributes, closed if raw_attributes.empty?
|
|
-
|
|
- scanner = StringScanner.new(raw_attributes)
|
|
- until scanner.eos?
|
|
- if scanner.scan(/\s+/)
|
|
- break if scanner.eos?
|
|
+ if @document_status.nil? and match_data[1] == "xml"
|
|
+ content = match_data[2]
|
|
+ version = VERSION.match(content)
|
|
+ version = version[1] unless version.nil?
|
|
+ encoding = ENCODING.match(content)
|
|
+ encoding = encoding[1] unless encoding.nil?
|
|
+ if need_source_encoding_update?(encoding)
|
|
+ @source.encoding = encoding
|
|
+ end
|
|
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
|
+ encoding = "UTF-16"
|
|
end
|
|
+ standalone = STANDALONE.match(content)
|
|
+ standalone = standalone[1] unless standalone.nil?
|
|
+ return [ :xmldecl, version, encoding, standalone ]
|
|
+ end
|
|
+ [:processing_instruction, match_data[1], match_data[2]]
|
|
+ end
|
|
|
|
- pos = scanner.pos
|
|
- loop do
|
|
- break if scanner.scan(ATTRIBUTE_PATTERN)
|
|
- unless scanner.scan(QNAME)
|
|
- message = "Invalid attribute name: <#{scanner.rest}>"
|
|
- raise REXML::ParseException.new(message, @source)
|
|
- end
|
|
- name = scanner[0]
|
|
- unless scanner.scan(/\s*=\s*/um)
|
|
+ def parse_attributes(prefixes, curr_ns)
|
|
+ attributes = {}
|
|
+ closed = false
|
|
+ while true
|
|
+ if @source.match(">", true)
|
|
+ return attributes, closed
|
|
+ elsif @source.match("/>", true)
|
|
+ closed = true
|
|
+ return attributes, closed
|
|
+ elsif match = @source.match(QNAME, true)
|
|
+ name = match[1]
|
|
+ prefix = match[2]
|
|
+ local_part = match[3]
|
|
+
|
|
+ unless @source.match(/\s*=\s*/um, true)
|
|
message = "Missing attribute equal: <#{name}>"
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
- quote = scanner.scan(/['"]/)
|
|
- unless quote
|
|
+ unless match = @source.match(/(['"])/, true)
|
|
message = "Missing attribute value start quote: <#{name}>"
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
- unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
|
|
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
|
- if match_data
|
|
- scanner << "/" if closed
|
|
- scanner << ">"
|
|
- scanner << match_data[1]
|
|
- scanner.pos = pos
|
|
- closed = !match_data[2].nil?
|
|
- next
|
|
- end
|
|
- message =
|
|
- "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
+ quote = match[1]
|
|
+ start_position = @source.position
|
|
+ value = @source.read_until(quote)
|
|
+ unless value.chomp!(quote)
|
|
+ @source.position = start_position
|
|
+ message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
- end
|
|
- name = scanner[1]
|
|
- prefix = scanner[2]
|
|
- local_part = scanner[3]
|
|
- # quote = scanner[4]
|
|
- value = scanner[5]
|
|
- if prefix == "xmlns"
|
|
- if local_part == "xml"
|
|
- if value != "http://www.w3.org/XML/1998/namespace"
|
|
- msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
+ @source.match(/\s*/um, true)
|
|
+ if prefix == "xmlns"
|
|
+ if local_part == "xml"
|
|
+ if value != "http://www.w3.org/XML/1998/namespace"
|
|
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
+ raise REXML::ParseException.new( msg, @source, self )
|
|
+ end
|
|
+ elsif local_part == "xmlns"
|
|
+ msg = "The 'xmlns' prefix must not be declared "+
|
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
- raise REXML::ParseException.new( msg, @source, self )
|
|
+ raise REXML::ParseException.new( msg, @source, self)
|
|
end
|
|
- elsif local_part == "xmlns"
|
|
- msg = "The 'xmlns' prefix must not be declared "+
|
|
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
- raise REXML::ParseException.new( msg, @source, self)
|
|
+ curr_ns << local_part
|
|
+ elsif prefix
|
|
+ prefixes << prefix unless prefix == "xml"
|
|
end
|
|
- curr_ns << local_part
|
|
- elsif prefix
|
|
- prefixes << prefix unless prefix == "xml"
|
|
- end
|
|
|
|
- if attributes.has_key?(name)
|
|
- msg = "Duplicate attribute #{name.inspect}"
|
|
- raise REXML::ParseException.new(msg, @source, self)
|
|
- end
|
|
+ if attributes[name]
|
|
+ msg = "Duplicate attribute #{name.inspect}"
|
|
+ raise REXML::ParseException.new(msg, @source, self)
|
|
+ end
|
|
|
|
- attributes[name] = value
|
|
+ attributes[name] = value
|
|
+ else
|
|
+ message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
|
+ raise REXML::ParseException.new(message, @source)
|
|
+ end
|
|
end
|
|
- return attributes, closed
|
|
end
|
|
end
|
|
end
|
|
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
|
|
index f0601ae..bdc0827 100644
|
|
--- a/lib/rexml/parsers/lightparser.rb
|
|
+++ b/lib/rexml/parsers/lightparser.rb
|
|
@@ -1,7 +1,7 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/parsers/streamparser'
|
|
-require 'rexml/parsers/baseparser'
|
|
-require 'rexml/light/node'
|
|
+require_relative 'streamparser'
|
|
+require_relative 'baseparser'
|
|
+require_relative '../light/node'
|
|
|
|
module REXML
|
|
module Parsers
|
|
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
|
|
index 8c49217..f8b232a 100644
|
|
--- a/lib/rexml/parsers/pullparser.rb
|
|
+++ b/lib/rexml/parsers/pullparser.rb
|
|
@@ -1,9 +1,9 @@
|
|
# frozen_string_literal: false
|
|
require 'forwardable'
|
|
|
|
-require 'rexml/parseexception'
|
|
-require 'rexml/parsers/baseparser'
|
|
-require 'rexml/xmltokens'
|
|
+require_relative '../parseexception'
|
|
+require_relative 'baseparser'
|
|
+require_relative '../xmltokens'
|
|
|
|
module REXML
|
|
module Parsers
|
|
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
|
|
index 1386f69..6a24ce2 100644
|
|
--- a/lib/rexml/parsers/sax2parser.rb
|
|
+++ b/lib/rexml/parsers/sax2parser.rb
|
|
@@ -1,8 +1,8 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/parsers/baseparser'
|
|
-require 'rexml/parseexception'
|
|
-require 'rexml/namespace'
|
|
-require 'rexml/text'
|
|
+require_relative 'baseparser'
|
|
+require_relative '../parseexception'
|
|
+require_relative '../namespace'
|
|
+require_relative '../text'
|
|
|
|
module REXML
|
|
module Parsers
|
|
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
|
|
index f6a8bfa..9e0eb0b 100644
|
|
--- a/lib/rexml/parsers/streamparser.rb
|
|
+++ b/lib/rexml/parsers/streamparser.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/parsers/baseparser"
|
|
+require_relative "baseparser"
|
|
|
|
module REXML
|
|
module Parsers
|
|
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
|
|
index fc0993c..0cb6f7c 100644
|
|
--- a/lib/rexml/parsers/treeparser.rb
|
|
+++ b/lib/rexml/parsers/treeparser.rb
|
|
@@ -1,6 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/validation/validationexception'
|
|
-require 'rexml/undefinednamespaceexception'
|
|
+require_relative '../validation/validationexception'
|
|
+require_relative '../undefinednamespaceexception'
|
|
|
|
module REXML
|
|
module Parsers
|
|
@@ -16,7 +16,6 @@ module REXML
|
|
|
|
def parse
|
|
tag_stack = []
|
|
- in_doctype = false
|
|
entities = nil
|
|
begin
|
|
while true
|
|
@@ -39,17 +38,15 @@ module REXML
|
|
tag_stack.pop
|
|
@build_context = @build_context.parent
|
|
when :text
|
|
- if not in_doctype
|
|
- if @build_context[-1].instance_of? Text
|
|
- @build_context[-1] << event[1]
|
|
- else
|
|
- @build_context.add(
|
|
- Text.new(event[1], @build_context.whitespace, nil, true)
|
|
- ) unless (
|
|
- @build_context.ignore_whitespace_nodes and
|
|
- event[1].strip.size==0
|
|
- )
|
|
- end
|
|
+ if @build_context[-1].instance_of? Text
|
|
+ @build_context[-1] << event[1]
|
|
+ else
|
|
+ @build_context.add(
|
|
+ Text.new(event[1], @build_context.whitespace, nil, true)
|
|
+ ) unless (
|
|
+ @build_context.ignore_whitespace_nodes and
|
|
+ event[1].strip.size==0
|
|
+ )
|
|
end
|
|
when :comment
|
|
c = Comment.new( event[1] )
|
|
@@ -60,14 +57,12 @@ module REXML
|
|
when :processing_instruction
|
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
|
when :end_doctype
|
|
- in_doctype = false
|
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
|
@build_context = @build_context.parent
|
|
when :start_doctype
|
|
doctype = DocType.new( event[1..-1], @build_context )
|
|
@build_context = doctype
|
|
entities = {}
|
|
- in_doctype = true
|
|
when :attlistdecl
|
|
n = AttlistDecl.new( event[1..-1] )
|
|
@build_context.add( n )
|
|
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
|
|
index 6571d11..e0029f4 100644
|
|
--- a/lib/rexml/parsers/ultralightparser.rb
|
|
+++ b/lib/rexml/parsers/ultralightparser.rb
|
|
@@ -1,6 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/parsers/streamparser'
|
|
-require 'rexml/parsers/baseparser'
|
|
+require_relative 'streamparser'
|
|
+require_relative 'baseparser'
|
|
|
|
module REXML
|
|
module Parsers
|
|
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
|
|
index 32b70bb..bd3b685 100644
|
|
--- a/lib/rexml/parsers/xpathparser.rb
|
|
+++ b/lib/rexml/parsers/xpathparser.rb
|
|
@@ -1,6 +1,7 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/namespace'
|
|
-require 'rexml/xmltokens'
|
|
+
|
|
+require_relative '../namespace'
|
|
+require_relative '../xmltokens'
|
|
|
|
module REXML
|
|
module Parsers
|
|
@@ -22,7 +23,13 @@ module REXML
|
|
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
|
|
path.gsub!( /\s+([\]\)])/, '\1')
|
|
parsed = []
|
|
- OrExpr(path, parsed)
|
|
+ rest = OrExpr(path, parsed)
|
|
+ if rest
|
|
+ unless rest.strip.empty?
|
|
+ raise ParseException.new("Garbage component exists at the end: " +
|
|
+ "<#{rest}>: <#{path}>")
|
|
+ end
|
|
+ end
|
|
parsed
|
|
end
|
|
|
|
@@ -32,108 +39,143 @@ module REXML
|
|
parsed
|
|
end
|
|
|
|
- def abbreviate( path )
|
|
- path = path.kind_of?(String) ? parse( path ) : path
|
|
- string = ""
|
|
- document = false
|
|
- while path.size > 0
|
|
- op = path.shift
|
|
+ def abbreviate(path_or_parsed)
|
|
+ if path_or_parsed.kind_of?(String)
|
|
+ parsed = parse(path_or_parsed)
|
|
+ else
|
|
+ parsed = path_or_parsed
|
|
+ end
|
|
+ components = []
|
|
+ component = nil
|
|
+ while parsed.size > 0
|
|
+ op = parsed.shift
|
|
case op
|
|
when :node
|
|
+ component << "node()"
|
|
when :attribute
|
|
- string << "/" if string.size > 0
|
|
- string << "@"
|
|
+ component = "@"
|
|
+ components << component
|
|
when :child
|
|
- string << "/" if string.size > 0
|
|
+ component = ""
|
|
+ components << component
|
|
when :descendant_or_self
|
|
- string << "/"
|
|
+ next_op = parsed[0]
|
|
+ if next_op == :node
|
|
+ parsed.shift
|
|
+ component = ""
|
|
+ components << component
|
|
+ else
|
|
+ component = "descendant-or-self::"
|
|
+ components << component
|
|
+ end
|
|
when :self
|
|
- string << "."
|
|
+ next_op = parsed[0]
|
|
+ if next_op == :node
|
|
+ parsed.shift
|
|
+ components << "."
|
|
+ else
|
|
+ component = "self::"
|
|
+ components << component
|
|
+ end
|
|
when :parent
|
|
- string << ".."
|
|
+ next_op = parsed[0]
|
|
+ if next_op == :node
|
|
+ parsed.shift
|
|
+ components << ".."
|
|
+ else
|
|
+ component = "parent::"
|
|
+ components << component
|
|
+ end
|
|
when :any
|
|
- string << "*"
|
|
+ component << "*"
|
|
when :text
|
|
- string << "text()"
|
|
+ component << "text()"
|
|
when :following, :following_sibling,
|
|
:ancestor, :ancestor_or_self, :descendant,
|
|
:namespace, :preceding, :preceding_sibling
|
|
- string << "/" unless string.size == 0
|
|
- string << op.to_s.tr("_", "-")
|
|
- string << "::"
|
|
+ component = op.to_s.tr("_", "-") << "::"
|
|
+ components << component
|
|
when :qname
|
|
- prefix = path.shift
|
|
- name = path.shift
|
|
- string << prefix+":" if prefix.size > 0
|
|
- string << name
|
|
+ prefix = parsed.shift
|
|
+ name = parsed.shift
|
|
+ component << prefix+":" if prefix.size > 0
|
|
+ component << name
|
|
when :predicate
|
|
- string << '['
|
|
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
|
|
- string << ']'
|
|
+ component << '['
|
|
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
|
|
+ component << ']'
|
|
when :document
|
|
- document = true
|
|
+ components << ""
|
|
when :function
|
|
- string << path.shift
|
|
- string << "( "
|
|
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
|
|
- string << " )"
|
|
+ component << parsed.shift
|
|
+ component << "( "
|
|
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
|
|
+ component << " )"
|
|
when :literal
|
|
- string << %Q{ "#{path.shift}" }
|
|
+ component << quote_literal(parsed.shift)
|
|
else
|
|
- string << "/" unless string.size == 0
|
|
- string << "UNKNOWN("
|
|
- string << op.inspect
|
|
- string << ")"
|
|
+ component << "UNKNOWN("
|
|
+ component << op.inspect
|
|
+ component << ")"
|
|
end
|
|
end
|
|
- string = "/"+string if document
|
|
- return string
|
|
+ case components
|
|
+ when [""]
|
|
+ "/"
|
|
+ when ["", ""]
|
|
+ "//"
|
|
+ else
|
|
+ components.join("/")
|
|
+ end
|
|
end
|
|
|
|
- def expand( path )
|
|
- path = path.kind_of?(String) ? parse( path ) : path
|
|
- string = ""
|
|
+ def expand(path_or_parsed)
|
|
+ if path_or_parsed.kind_of?(String)
|
|
+ parsed = parse(path_or_parsed)
|
|
+ else
|
|
+ parsed = path_or_parsed
|
|
+ end
|
|
+ path = ""
|
|
document = false
|
|
- while path.size > 0
|
|
- op = path.shift
|
|
+ while parsed.size > 0
|
|
+ op = parsed.shift
|
|
case op
|
|
when :node
|
|
- string << "node()"
|
|
+ path << "node()"
|
|
when :attribute, :child, :following, :following_sibling,
|
|
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
|
|
:namespace, :preceding, :preceding_sibling, :self, :parent
|
|
- string << "/" unless string.size == 0
|
|
- string << op.to_s.tr("_", "-")
|
|
- string << "::"
|
|
+ path << "/" unless path.size == 0
|
|
+ path << op.to_s.tr("_", "-")
|
|
+ path << "::"
|
|
when :any
|
|
- string << "*"
|
|
+ path << "*"
|
|
when :qname
|
|
- prefix = path.shift
|
|
- name = path.shift
|
|
- string << prefix+":" if prefix.size > 0
|
|
- string << name
|
|
+ prefix = parsed.shift
|
|
+ name = parsed.shift
|
|
+ path << prefix+":" if prefix.size > 0
|
|
+ path << name
|
|
when :predicate
|
|
- string << '['
|
|
- string << predicate_to_string( path.shift ) { |x| expand(x) }
|
|
- string << ']'
|
|
+ path << '['
|
|
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
|
|
+ path << ']'
|
|
when :document
|
|
document = true
|
|
else
|
|
- string << "/" unless string.size == 0
|
|
- string << "UNKNOWN("
|
|
- string << op.inspect
|
|
- string << ")"
|
|
+ path << "UNKNOWN("
|
|
+ path << op.inspect
|
|
+ path << ")"
|
|
end
|
|
end
|
|
- string = "/"+string if document
|
|
- return string
|
|
+ path = "/"+path if document
|
|
+ path
|
|
end
|
|
|
|
- def predicate_to_string( path, &block )
|
|
- string = ""
|
|
- case path[0]
|
|
+ def predicate_to_path(parsed, &block)
|
|
+ path = ""
|
|
+ case parsed[0]
|
|
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
|
|
- op = path.shift
|
|
+ op = parsed.shift
|
|
case op
|
|
when :eq
|
|
op = "="
|
|
@@ -150,42 +192,56 @@ module REXML
|
|
when :union
|
|
op = "|"
|
|
end
|
|
- left = predicate_to_string( path.shift, &block )
|
|
- right = predicate_to_string( path.shift, &block )
|
|
- string << " "
|
|
- string << left
|
|
- string << " "
|
|
- string << op.to_s
|
|
- string << " "
|
|
- string << right
|
|
- string << " "
|
|
+ left = predicate_to_path( parsed.shift, &block )
|
|
+ right = predicate_to_path( parsed.shift, &block )
|
|
+ path << left
|
|
+ path << " "
|
|
+ path << op.to_s
|
|
+ path << " "
|
|
+ path << right
|
|
when :function
|
|
- path.shift
|
|
- name = path.shift
|
|
- string << name
|
|
- string << "( "
|
|
- string << predicate_to_string( path.shift, &block )
|
|
- string << " )"
|
|
+ parsed.shift
|
|
+ name = parsed.shift
|
|
+ path << name
|
|
+ path << "("
|
|
+ parsed.shift.each_with_index do |argument, i|
|
|
+ path << ", " if i > 0
|
|
+ path << predicate_to_path(argument, &block)
|
|
+ end
|
|
+ path << ")"
|
|
when :literal
|
|
- path.shift
|
|
- string << " "
|
|
- string << path.shift.inspect
|
|
- string << " "
|
|
+ parsed.shift
|
|
+ path << quote_literal(parsed.shift)
|
|
else
|
|
- string << " "
|
|
- string << yield( path )
|
|
- string << " "
|
|
+ path << yield( parsed )
|
|
end
|
|
- return string.squeeze(" ")
|
|
+ return path.squeeze(" ")
|
|
end
|
|
+ # For backward compatibility
|
|
+ alias_method :preciate_to_string, :predicate_to_path
|
|
|
|
private
|
|
+ def quote_literal( literal )
|
|
+ case literal
|
|
+ when String
|
|
+ # XPath 1.0 does not support escape characters.
|
|
+ # Assumes literal does not contain both single and double quotes.
|
|
+ if literal.include?("'")
|
|
+ "\"#{literal}\""
|
|
+ else
|
|
+ "'#{literal}'"
|
|
+ end
|
|
+ else
|
|
+ literal.inspect
|
|
+ end
|
|
+ end
|
|
+
|
|
#LocationPath
|
|
# | RelativeLocationPath
|
|
# | '/' RelativeLocationPath?
|
|
# | '//' RelativeLocationPath
|
|
def LocationPath path, parsed
|
|
- path = path.strip
|
|
+ path = path.lstrip
|
|
if path[0] == ?/
|
|
parsed << :document
|
|
if path[1] == ?/
|
|
@@ -209,7 +265,12 @@ module REXML
|
|
# | RelativeLocationPath '//' Step
|
|
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
|
|
def RelativeLocationPath path, parsed
|
|
- while path.size > 0
|
|
+ loop do
|
|
+ original_path = path
|
|
+ path = path.lstrip
|
|
+
|
|
+ return original_path if path.empty?
|
|
+
|
|
# (axis or @ or <child::>) nodetest predicate >
|
|
# OR > / Step
|
|
# (. or ..) >
|
|
@@ -224,43 +285,44 @@ module REXML
|
|
path = path[1..-1]
|
|
end
|
|
else
|
|
+ path_before_axis_specifier = path
|
|
+ parsed_not_abberviated = []
|
|
if path[0] == ?@
|
|
- parsed << :attribute
|
|
+ parsed_not_abberviated << :attribute
|
|
path = path[1..-1]
|
|
# Goto Nodetest
|
|
elsif path =~ AXIS
|
|
- parsed << $1.tr('-','_').intern
|
|
+ parsed_not_abberviated << $1.tr('-','_').intern
|
|
path = $'
|
|
# Goto Nodetest
|
|
else
|
|
- parsed << :child
|
|
+ parsed_not_abberviated << :child
|
|
end
|
|
|
|
- n = []
|
|
- path = NodeTest( path, n)
|
|
-
|
|
- if path[0] == ?[
|
|
- path = Predicate( path, n )
|
|
+ path_before_node_test = path
|
|
+ path = NodeTest(path, parsed_not_abberviated)
|
|
+ if path == path_before_node_test
|
|
+ return path_before_axis_specifier
|
|
end
|
|
+ path = Predicate(path, parsed_not_abberviated)
|
|
|
|
- parsed.concat(n)
|
|
+ parsed.concat(parsed_not_abberviated)
|
|
end
|
|
|
|
- if path.size > 0
|
|
- if path[0] == ?/
|
|
- if path[1] == ?/
|
|
- parsed << :descendant_or_self
|
|
- parsed << :node
|
|
- path = path[2..-1]
|
|
- else
|
|
- path = path[1..-1]
|
|
- end
|
|
- else
|
|
- return path
|
|
- end
|
|
+ original_path = path
|
|
+ path = path.lstrip
|
|
+ return original_path if path.empty?
|
|
+
|
|
+ return original_path if path[0] != ?/
|
|
+
|
|
+ if path[1] == ?/
|
|
+ parsed << :descendant_or_self
|
|
+ parsed << :node
|
|
+ path = path[2..-1]
|
|
+ else
|
|
+ path = path[1..-1]
|
|
end
|
|
end
|
|
- return path
|
|
end
|
|
|
|
# Returns a 1-1 map of the nodeset
|
|
@@ -269,15 +331,26 @@ module REXML
|
|
# String, if a name match
|
|
#NodeTest
|
|
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
|
|
- # | NODE_TYPE '(' ')' NodeType
|
|
+ # | '*' ':' NCNAME NameTest since XPath 2.0
|
|
+ # | NODE_TYPE '(' ')' NodeType
|
|
# | PI '(' LITERAL ')' PI
|
|
# | '[' expr ']' Predicate
|
|
- NCNAMETEST= /^(#{NCNAME_STR}):\*/u
|
|
+ PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u
|
|
+ LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u
|
|
QNAME = Namespace::NAMESPLIT
|
|
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
|
|
PI = /^processing-instruction\(/
|
|
def NodeTest path, parsed
|
|
+ original_path = path
|
|
+ path = path.lstrip
|
|
case path
|
|
+ when PREFIX_WILDCARD
|
|
+ prefix = nil
|
|
+ name = $1
|
|
+ path = $'
|
|
+ parsed << :qname
|
|
+ parsed << prefix
|
|
+ parsed << name
|
|
when /^\*/
|
|
path = $'
|
|
parsed << :any
|
|
@@ -288,7 +361,9 @@ module REXML
|
|
when PI
|
|
path = $'
|
|
literal = nil
|
|
- if path !~ /^\s*\)/
|
|
+ if path =~ /^\s*\)/
|
|
+ path = $'
|
|
+ else
|
|
path =~ LITERAL
|
|
literal = $1
|
|
path = $'
|
|
@@ -297,7 +372,7 @@ module REXML
|
|
end
|
|
parsed << :processing_instruction
|
|
parsed << (literal || '')
|
|
- when NCNAMETEST
|
|
+ when LOCAL_NAME_WILDCARD
|
|
prefix = $1
|
|
path = $'
|
|
parsed << :namespace
|
|
@@ -310,13 +385,17 @@ module REXML
|
|
parsed << :qname
|
|
parsed << prefix
|
|
parsed << name
|
|
+ else
|
|
+ path = original_path
|
|
end
|
|
return path
|
|
end
|
|
|
|
# Filters the supplied nodeset on the predicate(s)
|
|
def Predicate path, parsed
|
|
- return nil unless path[0] == ?[
|
|
+ original_path = path
|
|
+ path = path.lstrip
|
|
+ return original_path unless path[0] == ?[
|
|
predicates = []
|
|
while path[0] == ?[
|
|
path, expr = get_group(path)
|
|
@@ -421,13 +500,13 @@ module REXML
|
|
rest
|
|
end
|
|
|
|
- #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
|
|
+ #| AdditiveExpr ('+' | '-') MultiplicativeExpr
|
|
#| MultiplicativeExpr
|
|
def AdditiveExpr path, parsed
|
|
n = []
|
|
rest = MultiplicativeExpr( path, n )
|
|
if rest != path
|
|
- while rest =~ /^\s*(\+| -)\s*/
|
|
+ while rest =~ /^\s*(\+|-)\s*/
|
|
if $1[0] == ?+
|
|
n = [ :plus, n, [] ]
|
|
else
|
|
@@ -509,13 +588,14 @@ module REXML
|
|
#| LocationPath
|
|
#| FilterExpr ('/' | '//') RelativeLocationPath
|
|
def PathExpr path, parsed
|
|
- path =~ /^\s*/
|
|
- path = $'
|
|
+ path = path.lstrip
|
|
n = []
|
|
rest = FilterExpr( path, n )
|
|
if rest != path
|
|
if rest and rest[0] == ?/
|
|
- return RelativeLocationPath(rest, n)
|
|
+ rest = RelativeLocationPath(rest, n)
|
|
+ parsed.concat(n)
|
|
+ return rest
|
|
end
|
|
end
|
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
|
|
@@ -527,8 +607,10 @@ module REXML
|
|
#| PrimaryExpr
|
|
def FilterExpr path, parsed
|
|
n = []
|
|
- path = PrimaryExpr( path, n )
|
|
- path = Predicate(path, n) if path and path[0] == ?[
|
|
+ path_before_primary_expr = path
|
|
+ path = PrimaryExpr(path, n)
|
|
+ return path_before_primary_expr if path == path_before_primary_expr
|
|
+ path = Predicate(path, n)
|
|
parsed.concat(n)
|
|
path
|
|
end
|
|
diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb
|
|
index 5d6c77c..a0466b2 100644
|
|
--- a/lib/rexml/quickpath.rb
|
|
+++ b/lib/rexml/quickpath.rb
|
|
@@ -1,6 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/functions'
|
|
-require 'rexml/xmltokens'
|
|
+require_relative 'functions'
|
|
+require_relative 'xmltokens'
|
|
|
|
module REXML
|
|
class QuickPath
|
|
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
|
|
index 92e689b..3af03ec 100644
|
|
--- a/lib/rexml/rexml.rb
|
|
+++ b/lib/rexml/rexml.rb
|
|
@@ -1,31 +1,38 @@
|
|
-# -*- encoding: utf-8 -*-
|
|
+# -*- coding: utf-8 -*-
|
|
# frozen_string_literal: false
|
|
-# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
|
|
-#
|
|
-# REXML is a _pure_ Ruby, XML 1.0 conforming,
|
|
-# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
|
|
-# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
|
|
-# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
|
|
-# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
|
|
-# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
|
|
-# Ruby 1.8, REXML is included in the standard Ruby distribution.
|
|
-#
|
|
-# Main page:: http://www.germane-software.com/software/rexml
|
|
-# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
|
-# Date:: 2008/019
|
|
-# Version:: 3.1.7.3
|
|
-#
|
|
-# This API documentation can be downloaded from the REXML home page, or can
|
|
-# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
|
-#
|
|
-# A tutorial is available in the REXML distribution in docs/tutorial.html,
|
|
-# or can be accessed
|
|
-# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
|
+#
|
|
+# \Module \REXML provides classes and methods for parsing,
|
|
+# editing, and generating XML.
|
|
+#
|
|
+# == Implementation
|
|
+#
|
|
+# \REXML:
|
|
+# - Is pure Ruby.
|
|
+# - Provides tree, stream, SAX2, pull, and lightweight APIs.
|
|
+# - Conforms to {XML version 1.0}[https://www.w3.org/TR/REC-xml/].
|
|
+# - Fully implements {XPath version 1.0}[http://www.w3c.org/tr/xpath].
|
|
+# - Is {non-validating}[https://www.w3.org/TR/xml/].
|
|
+# - Passes 100% of the non-validating {Oasis tests}[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml].
|
|
+#
|
|
+# == In a Hurry?
|
|
+#
|
|
+# If you're somewhat familiar with XML
|
|
+# and have a particular task in mind,
|
|
+# you may want to see {the tasks pages}[doc/rexml/tasks/tocs/master_toc_rdoc.html].
|
|
+#
|
|
+# == API
|
|
+#
|
|
+# Among the most important classes for using \REXML are:
|
|
+# - REXML::Document.
|
|
+# - REXML::Element.
|
|
+#
|
|
+# There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
|
|
+#
|
|
module REXML
|
|
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
|
|
DATE = "2008/019"
|
|
- VERSION = "3.1.7.4"
|
|
- REVISION = %w$Revision: 53141 $[1] || ''
|
|
+ VERSION = "3.3.1"
|
|
+ REVISION = ""
|
|
|
|
Copyright = COPYRIGHT
|
|
Version = VERSION
|
|
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
|
|
index af65cf4..5715c35 100644
|
|
--- a/lib/rexml/source.rb
|
|
+++ b/lib/rexml/source.rb
|
|
@@ -1,8 +1,28 @@
|
|
# coding: US-ASCII
|
|
# frozen_string_literal: false
|
|
-require 'rexml/encoding'
|
|
+
|
|
+require "strscan"
|
|
+
|
|
+require_relative 'encoding'
|
|
|
|
module REXML
|
|
+ if StringScanner::Version < "1.0.0"
|
|
+ module StringScannerCheckScanString
|
|
+ refine StringScanner do
|
|
+ def check(pattern)
|
|
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
|
+ super(pattern)
|
|
+ end
|
|
+
|
|
+ def scan(pattern)
|
|
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
|
+ super(pattern)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ using StringScannerCheckScanString
|
|
+ end
|
|
+
|
|
# Generates Source-s. USE THIS CLASS.
|
|
class SourceFactory
|
|
# Generates a Source object
|
|
@@ -30,18 +50,27 @@ module REXML
|
|
# objects and provides consumption of text
|
|
class Source
|
|
include Encoding
|
|
- # The current buffer (what we're going to read next)
|
|
- attr_reader :buffer
|
|
# The line number of the last consumed text
|
|
attr_reader :line
|
|
attr_reader :encoding
|
|
|
|
+ module Private
|
|
+ SCANNER_RESET_SIZE = 100000
|
|
+ PRE_DEFINED_TERM_PATTERNS = {}
|
|
+ pre_defined_terms = ["'", '"', "<"]
|
|
+ pre_defined_terms.each do |term|
|
|
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
|
+ end
|
|
+ end
|
|
+ private_constant :Private
|
|
+
|
|
# Constructor
|
|
# @param arg must be a String, and should be a valid XML document
|
|
# @param encoding if non-null, sets the encoding of the source to this
|
|
# value, overriding all encoding detection
|
|
def initialize(arg, encoding=nil)
|
|
- @orig = @buffer = arg
|
|
+ @orig = arg
|
|
+ @scanner = StringScanner.new(@orig)
|
|
if encoding
|
|
self.encoding = encoding
|
|
else
|
|
@@ -50,6 +79,20 @@ module REXML
|
|
@line = 0
|
|
end
|
|
|
|
+ # The current buffer (what we're going to read next)
|
|
+ def buffer
|
|
+ @scanner.rest
|
|
+ end
|
|
+
|
|
+ def drop_parsed_content
|
|
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
|
|
+ @scanner.string = @scanner.rest
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def buffer_encoding=(encoding)
|
|
+ @scanner.string.force_encoding(encoding)
|
|
+ end
|
|
|
|
# Inherited from Encoding
|
|
# Overridden to support optimized en/decoding
|
|
@@ -58,98 +101,78 @@ module REXML
|
|
encoding_updated
|
|
end
|
|
|
|
- # Scans the source for a given pattern. Note, that this is not your
|
|
- # usual scan() method. For one thing, the pattern argument has some
|
|
- # requirements; for another, the source can be consumed. You can easily
|
|
- # confuse this method. Originally, the patterns were easier
|
|
- # to construct and this method more robust, because this method
|
|
- # generated search regexps on the fly; however, this was
|
|
- # computationally expensive and slowed down the entire REXML package
|
|
- # considerably, since this is by far the most commonly called method.
|
|
- # @param pattern must be a Regexp, and must be in the form of
|
|
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
|
- # will be returned; the second group is used if the consume flag is
|
|
- # set.
|
|
- # @param consume if true, the pattern returned will be consumed, leaving
|
|
- # everything after it in the Source.
|
|
- # @return the pattern, if found, or nil if the Source is empty or the
|
|
- # pattern is not found.
|
|
- def scan(pattern, cons=false)
|
|
- return nil if @buffer.nil?
|
|
- rv = @buffer.scan(pattern)
|
|
- @buffer = $' if cons and rv.size>0
|
|
- rv
|
|
+ def read(term = nil)
|
|
end
|
|
|
|
- def read
|
|
+ def read_until(term)
|
|
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
|
+ data = @scanner.scan_until(pattern)
|
|
+ unless data
|
|
+ data = @scanner.rest
|
|
+ @scanner.pos = @scanner.string.bytesize
|
|
+ end
|
|
+ data
|
|
end
|
|
|
|
- def consume( pattern )
|
|
- @buffer = $' if pattern.match( @buffer )
|
|
+ def ensure_buffer
|
|
end
|
|
|
|
- def match_to( char, pattern )
|
|
- return pattern.match(@buffer)
|
|
+ def match(pattern, cons=false)
|
|
+ if cons
|
|
+ @scanner.scan(pattern).nil? ? nil : @scanner
|
|
+ else
|
|
+ @scanner.check(pattern).nil? ? nil : @scanner
|
|
+ end
|
|
end
|
|
|
|
- def match_to_consume( char, pattern )
|
|
- md = pattern.match(@buffer)
|
|
- @buffer = $'
|
|
- return md
|
|
+ def position
|
|
+ @scanner.pos
|
|
end
|
|
|
|
- def match(pattern, cons=false)
|
|
- md = pattern.match(@buffer)
|
|
- @buffer = $' if cons and md
|
|
- return md
|
|
+ def position=(pos)
|
|
+ @scanner.pos = pos
|
|
end
|
|
|
|
# @return true if the Source is exhausted
|
|
def empty?
|
|
- @buffer == ""
|
|
- end
|
|
-
|
|
- def position
|
|
- @orig.index( @buffer )
|
|
+ @scanner.eos?
|
|
end
|
|
|
|
# @return the current line in the source
|
|
def current_line
|
|
lines = @orig.split
|
|
- res = lines.grep @buffer[0..30]
|
|
+ res = lines.grep @scanner.rest[0..30]
|
|
res = res[-1] if res.kind_of? Array
|
|
lines.index( res ) if res
|
|
end
|
|
|
|
private
|
|
+
|
|
def detect_encoding
|
|
- buffer_encoding = @buffer.encoding
|
|
+ scanner_encoding = @scanner.rest.encoding
|
|
detected_encoding = "UTF-8"
|
|
begin
|
|
- @buffer.force_encoding("ASCII-8BIT")
|
|
- if @buffer[0, 2] == "\xfe\xff"
|
|
- @buffer[0, 2] = ""
|
|
+ @scanner.string.force_encoding("ASCII-8BIT")
|
|
+ if @scanner.scan(/\xfe\xff/n)
|
|
detected_encoding = "UTF-16BE"
|
|
- elsif @buffer[0, 2] == "\xff\xfe"
|
|
- @buffer[0, 2] = ""
|
|
+ elsif @scanner.scan(/\xff\xfe/n)
|
|
detected_encoding = "UTF-16LE"
|
|
- elsif @buffer[0, 3] == "\xef\xbb\xbf"
|
|
- @buffer[0, 3] = ""
|
|
+ elsif @scanner.scan(/\xef\xbb\xbf/n)
|
|
detected_encoding = "UTF-8"
|
|
end
|
|
ensure
|
|
- @buffer.force_encoding(buffer_encoding)
|
|
+ @scanner.string.force_encoding(scanner_encoding)
|
|
end
|
|
self.encoding = detected_encoding
|
|
end
|
|
|
|
def encoding_updated
|
|
if @encoding != 'UTF-8'
|
|
- @buffer = decode(@buffer)
|
|
+ @scanner.string = decode(@scanner.rest)
|
|
@to_utf = true
|
|
else
|
|
@to_utf = false
|
|
- @buffer.force_encoding ::Encoding::UTF_8
|
|
+ @scanner.string.force_encoding(::Encoding::UTF_8)
|
|
end
|
|
end
|
|
end
|
|
@@ -172,7 +195,7 @@ module REXML
|
|
end
|
|
|
|
if !@to_utf and
|
|
- @buffer.respond_to?(:force_encoding) and
|
|
+ @orig.respond_to?(:force_encoding) and
|
|
@source.respond_to?(:external_encoding) and
|
|
@source.external_encoding != ::Encoding::UTF_8
|
|
@force_utf8 = true
|
|
@@ -181,65 +204,62 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- def scan(pattern, cons=false)
|
|
- rv = super
|
|
- # You'll notice that this next section is very similar to the same
|
|
- # section in match(), but just a liiittle different. This is
|
|
- # because it is a touch faster to do it this way with scan()
|
|
- # than the way match() does it; enough faster to warrant duplicating
|
|
- # some code
|
|
- if rv.size == 0
|
|
- until @buffer =~ pattern or @source.nil?
|
|
- begin
|
|
- @buffer << readline
|
|
- rescue Iconv::IllegalSequence
|
|
- raise
|
|
- rescue
|
|
- @source = nil
|
|
- end
|
|
- end
|
|
- rv = super
|
|
- end
|
|
- rv.taint
|
|
- rv
|
|
- end
|
|
-
|
|
- def read
|
|
+ def read(term = nil)
|
|
+ term = encode(term) if term
|
|
begin
|
|
- @buffer << readline
|
|
+ @scanner << readline(term)
|
|
+ true
|
|
rescue Exception, NameError
|
|
@source = nil
|
|
+ false
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def read_until(term)
|
|
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
|
+ term = encode(term)
|
|
+ until str = @scanner.scan_until(pattern)
|
|
+ break if @source.nil?
|
|
+ break if @source.eof?
|
|
+ @scanner << readline(term)
|
|
+ end
|
|
+ if str
|
|
+ read if @scanner.eos? and !@source.eof?
|
|
+ str
|
|
+ else
|
|
+ rest = @scanner.rest
|
|
+ @scanner.pos = @scanner.string.bytesize
|
|
+ rest
|
|
end
|
|
end
|
|
|
|
- def consume( pattern )
|
|
- match( pattern, true )
|
|
+ def ensure_buffer
|
|
+ read if @scanner.eos? && @source
|
|
end
|
|
|
|
+ # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
|
+ # - ">"
|
|
+ # - "XXX>" (X is any string excluding '>')
|
|
def match( pattern, cons=false )
|
|
- rv = pattern.match(@buffer)
|
|
- @buffer = $' if cons and rv
|
|
- while !rv and @source
|
|
- begin
|
|
- @buffer << readline
|
|
- rv = pattern.match(@buffer)
|
|
- @buffer = $' if cons and rv
|
|
- rescue
|
|
- @source = nil
|
|
+ while true
|
|
+ if cons
|
|
+ md = @scanner.scan(pattern)
|
|
+ else
|
|
+ md = @scanner.check(pattern)
|
|
end
|
|
+ break if md
|
|
+ return nil if pattern.is_a?(String)
|
|
+ return nil if @source.nil?
|
|
+ return nil unless read
|
|
end
|
|
- rv.taint
|
|
- rv
|
|
+
|
|
+ md.nil? ? nil : @scanner
|
|
end
|
|
|
|
def empty?
|
|
super and ( @source.nil? || @source.eof? )
|
|
end
|
|
|
|
- def position
|
|
- @er_source.pos rescue 0
|
|
- end
|
|
-
|
|
# @return the current line in the source
|
|
def current_line
|
|
begin
|
|
@@ -254,6 +274,7 @@ module REXML
|
|
end
|
|
rescue
|
|
end
|
|
+ @er_source.seek(pos)
|
|
rescue IOError
|
|
pos = -1
|
|
line = -1
|
|
@@ -262,8 +283,8 @@ module REXML
|
|
end
|
|
|
|
private
|
|
- def readline
|
|
- str = @source.readline(@line_break)
|
|
+ def readline(term = nil)
|
|
+ str = @source.readline(term || @line_break)
|
|
if @pending_buffer
|
|
if str.nil?
|
|
str = @pending_buffer
|
|
@@ -289,7 +310,7 @@ module REXML
|
|
@source.set_encoding(@encoding, @encoding)
|
|
end
|
|
@line_break = encode(">")
|
|
- @pending_buffer, @buffer = @buffer, ""
|
|
+ @pending_buffer, @scanner.string = @scanner.rest, ""
|
|
@pending_buffer.force_encoding(@encoding)
|
|
super
|
|
end
|
|
diff --git a/lib/rexml/syncenumerator.rb b/lib/rexml/syncenumerator.rb
|
|
deleted file mode 100644
|
|
index a9d2ad7..0000000
|
|
--- a/lib/rexml/syncenumerator.rb
|
|
+++ /dev/null
|
|
@@ -1,33 +0,0 @@
|
|
-# frozen_string_literal: false
|
|
-module REXML
|
|
- class SyncEnumerator
|
|
- include Enumerable
|
|
-
|
|
- # Creates a new SyncEnumerator which enumerates rows of given
|
|
- # Enumerable objects.
|
|
- def initialize(*enums)
|
|
- @gens = enums
|
|
- @length = @gens.collect {|x| x.size }.max
|
|
- end
|
|
-
|
|
- # Returns the number of enumerated Enumerable objects, i.e. the size
|
|
- # of each row.
|
|
- def size
|
|
- @gens.size
|
|
- end
|
|
-
|
|
- # Returns the number of enumerated Enumerable objects, i.e. the size
|
|
- # of each row.
|
|
- def length
|
|
- @gens.length
|
|
- end
|
|
-
|
|
- # Enumerates rows of the Enumerable objects.
|
|
- def each
|
|
- @length.times {|i|
|
|
- yield @gens.collect {|x| x[i]}
|
|
- }
|
|
- self
|
|
- end
|
|
- end
|
|
-end
|
|
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
|
|
index 86269de..b47bad3 100644
|
|
--- a/lib/rexml/text.rb
|
|
+++ b/lib/rexml/text.rb
|
|
@@ -1,10 +1,10 @@
|
|
-# frozen_string_literal: false
|
|
-require 'rexml/security'
|
|
-require 'rexml/entity'
|
|
-require 'rexml/doctype'
|
|
-require 'rexml/child'
|
|
-require 'rexml/doctype'
|
|
-require 'rexml/parseexception'
|
|
+# frozen_string_literal: true
|
|
+require_relative 'security'
|
|
+require_relative 'entity'
|
|
+require_relative 'doctype'
|
|
+require_relative 'child'
|
|
+require_relative 'doctype'
|
|
+require_relative 'parseexception'
|
|
|
|
module REXML
|
|
# Represents text nodes in an XML document
|
|
@@ -96,27 +96,28 @@ module REXML
|
|
|
|
@raw = false
|
|
@parent = nil
|
|
+ @entity_filter = nil
|
|
|
|
if parent
|
|
super( parent )
|
|
@raw = parent.raw
|
|
end
|
|
|
|
- @raw = raw unless raw.nil?
|
|
- @entity_filter = entity_filter
|
|
- clear_cache
|
|
-
|
|
if arg.kind_of? String
|
|
@string = arg.dup
|
|
- @string.squeeze!(" \n\t") unless respect_whitespace
|
|
elsif arg.kind_of? Text
|
|
- @string = arg.to_s
|
|
+ @string = arg.instance_variable_get(:@string).dup
|
|
@raw = arg.raw
|
|
- elsif
|
|
+ @entity_filter = arg.instance_variable_get(:@entity_filter)
|
|
+ else
|
|
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
|
|
end
|
|
|
|
- @string.gsub!( /\r\n?/, "\n" )
|
|
+ @string.squeeze!(" \n\t") unless respect_whitespace
|
|
+ @string.gsub!(/\r\n?/, "\n")
|
|
+ @raw = raw unless raw.nil?
|
|
+ @entity_filter = entity_filter if entity_filter
|
|
+ clear_cache
|
|
|
|
Text.check(@string, illegal, doctype) if @raw
|
|
end
|
|
@@ -130,13 +131,13 @@ module REXML
|
|
def Text.check string, pattern, doctype
|
|
|
|
# illegal anywhere
|
|
- if string !~ VALID_XML_CHARS
|
|
+ if !string.match?(VALID_XML_CHARS)
|
|
if String.method_defined? :encode
|
|
string.chars.each do |c|
|
|
case c.ord
|
|
when *VALID_CHAR
|
|
else
|
|
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
|
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
|
end
|
|
end
|
|
else
|
|
@@ -144,7 +145,7 @@ module REXML
|
|
case c.unpack('U')
|
|
when *VALID_CHAR
|
|
else
|
|
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
|
|
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
|
end
|
|
end
|
|
end
|
|
@@ -153,13 +154,13 @@ module REXML
|
|
# context sensitive
|
|
string.scan(pattern) do
|
|
if $1[-1] != ?;
|
|
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
|
+ raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
elsif $1[0] == ?&
|
|
if $5 and $5[0] == ?#
|
|
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
|
when *VALID_CHAR
|
|
else
|
|
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
|
|
+ raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
end
|
|
# FIXME: below can't work but this needs API change.
|
|
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
|
@@ -181,7 +182,7 @@ module REXML
|
|
|
|
|
|
def clone
|
|
- return Text.new(self)
|
|
+ return Text.new(self, true)
|
|
end
|
|
|
|
|
|
@@ -226,9 +227,7 @@ module REXML
|
|
# u.to_s #-> "sean russell"
|
|
def to_s
|
|
return @string if @raw
|
|
- return @normalized if @normalized
|
|
-
|
|
- @normalized = Text::normalize( @string, doctype, @entity_filter )
|
|
+ @normalized ||= Text::normalize( @string, doctype, @entity_filter )
|
|
end
|
|
|
|
def inspect
|
|
@@ -249,8 +248,7 @@ module REXML
|
|
# u = Text.new( "sean russell", false, nil, true )
|
|
# u.value #-> "sean russell"
|
|
def value
|
|
- return @unnormalized if @unnormalized
|
|
- @unnormalized = Text::unnormalize( @string, doctype )
|
|
+ @unnormalized ||= Text::unnormalize( @string, doctype )
|
|
end
|
|
|
|
# Sets the contents of this text node. This expects the text to be
|
|
@@ -266,16 +264,16 @@ module REXML
|
|
@raw = false
|
|
end
|
|
|
|
- def wrap(string, width, addnewline=false)
|
|
- # Recursively wrap string at width.
|
|
- return string if string.length <= width
|
|
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
|
- if addnewline then
|
|
- return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
- else
|
|
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
- end
|
|
- end
|
|
+ def wrap(string, width, addnewline=false)
|
|
+ # Recursively wrap string at width.
|
|
+ return string if string.length <= width
|
|
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
|
+ if addnewline then
|
|
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
+ else
|
|
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
|
+ end
|
|
+ end
|
|
|
|
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
|
return string if level < 0
|
|
@@ -373,7 +371,7 @@ module REXML
|
|
copy = input.to_s
|
|
# Doing it like this rather than in a loop improves the speed
|
|
#copy = copy.gsub( EREFERENCE, '&' )
|
|
- copy = copy.gsub( "&", "&" )
|
|
+ copy = copy.gsub( "&", "&" ) if copy.include?("&")
|
|
if doctype
|
|
# Replace all ampersands that aren't part of an entity
|
|
doctype.entities.each_value do |entity|
|
|
@@ -384,7 +382,9 @@ module REXML
|
|
else
|
|
# Replace all ampersands that aren't part of an entity
|
|
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
|
- copy = copy.gsub(entity.value, "&#{entity.name};" )
|
|
+ if copy.include?(entity.value)
|
|
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
|
|
+ end
|
|
end
|
|
end
|
|
copy
|
|
diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb
|
|
index e522ed5..492a098 100644
|
|
--- a/lib/rexml/undefinednamespaceexception.rb
|
|
+++ b/lib/rexml/undefinednamespaceexception.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/parseexception'
|
|
+require_relative 'parseexception'
|
|
module REXML
|
|
class UndefinedNamespaceException < ParseException
|
|
def initialize( prefix, source, parser )
|
|
diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb
|
|
index fb52438..f29a2c0 100644
|
|
--- a/lib/rexml/validation/relaxng.rb
|
|
+++ b/lib/rexml/validation/relaxng.rb
|
|
@@ -1,6 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require "rexml/validation/validation"
|
|
-require "rexml/parsers/baseparser"
|
|
+require_relative "validation"
|
|
+require_relative "../parsers/baseparser"
|
|
|
|
module REXML
|
|
module Validation
|
|
diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb
|
|
index f0c76f9..0ad6ada 100644
|
|
--- a/lib/rexml/validation/validation.rb
|
|
+++ b/lib/rexml/validation/validation.rb
|
|
@@ -1,5 +1,5 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/validation/validationexception'
|
|
+require_relative 'validationexception'
|
|
|
|
module REXML
|
|
module Validation
|
|
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
|
|
index a37e9f3..d19407c 100644
|
|
--- a/lib/rexml/xmldecl.rb
|
|
+++ b/lib/rexml/xmldecl.rb
|
|
@@ -1,17 +1,18 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/encoding'
|
|
-require 'rexml/source'
|
|
+
|
|
+require_relative 'encoding'
|
|
+require_relative 'source'
|
|
|
|
module REXML
|
|
# NEEDS DOCUMENTATION
|
|
class XMLDecl < Child
|
|
include Encoding
|
|
|
|
- DEFAULT_VERSION = "1.0";
|
|
- DEFAULT_ENCODING = "UTF-8";
|
|
- DEFAULT_STANDALONE = "no";
|
|
- START = '<\?xml';
|
|
- STOP = '\?>';
|
|
+ DEFAULT_VERSION = "1.0"
|
|
+ DEFAULT_ENCODING = "UTF-8"
|
|
+ DEFAULT_STANDALONE = "no"
|
|
+ START = "<?xml"
|
|
+ STOP = "?>"
|
|
|
|
attr_accessor :version, :standalone
|
|
attr_reader :writeencoding, :writethis
|
|
@@ -25,6 +26,7 @@ module REXML
|
|
self.encoding = version.encoding
|
|
@writeencoding = version.writeencoding
|
|
@standalone = version.standalone
|
|
+ @writethis = version.writethis
|
|
else
|
|
super()
|
|
@version = version
|
|
@@ -46,9 +48,9 @@ module REXML
|
|
# Ignored
|
|
def write(writer, indent=-1, transitive=false, ie_hack=false)
|
|
return nil unless @writethis or writer.kind_of? Output
|
|
- writer << START.sub(/\\/u, '')
|
|
+ writer << START
|
|
writer << " #{content encoding}"
|
|
- writer << STOP.sub(/\\/u, '')
|
|
+ writer << STOP
|
|
end
|
|
|
|
def ==( other )
|
|
@@ -102,14 +104,26 @@ module REXML
|
|
end
|
|
|
|
def inspect
|
|
- START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
|
|
+ "#{START} ... #{STOP}"
|
|
end
|
|
|
|
private
|
|
def content(enc)
|
|
- rv = "version='#@version'"
|
|
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i
|
|
- rv << " standalone='#@standalone'" if @standalone
|
|
+ context = nil
|
|
+ context = parent.context if parent
|
|
+ if context and context[:prologue_quote] == :quote
|
|
+ quote = "\""
|
|
+ else
|
|
+ quote = "'"
|
|
+ end
|
|
+
|
|
+ rv = "version=#{quote}#{@version}#{quote}"
|
|
+ if @writeencoding or enc !~ /\Autf-8\z/i
|
|
+ rv << " encoding=#{quote}#{enc}#{quote}"
|
|
+ end
|
|
+ if @standalone
|
|
+ rv << " standalone=#{quote}#{@standalone}#{quote}"
|
|
+ end
|
|
rv
|
|
end
|
|
end
|
|
diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
|
|
index f1cb99b..a0921bd 100644
|
|
--- a/lib/rexml/xpath.rb
|
|
+++ b/lib/rexml/xpath.rb
|
|
@@ -1,6 +1,6 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/functions'
|
|
-require 'rexml/xpath_parser'
|
|
+require_relative 'functions'
|
|
+require_relative 'xpath_parser'
|
|
|
|
module REXML
|
|
# Wrapper class. Use this class to access the XPath functions.
|
|
@@ -28,10 +28,10 @@ module REXML
|
|
# XPath.first( doc, "//b"} )
|
|
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
|
# XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
|
|
- def XPath::first element, path=nil, namespaces=nil, variables={}
|
|
+ def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
|
|
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
|
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
|
- parser = XPathParser.new
|
|
+ parser = XPathParser.new(**options)
|
|
parser.namespaces = namespaces
|
|
parser.variables = variables
|
|
path = "*" unless path
|
|
@@ -57,10 +57,10 @@ module REXML
|
|
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
|
# XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
|
|
# {|el| ... }
|
|
- def XPath::each element, path=nil, namespaces=nil, variables={}, &block
|
|
+ def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block)
|
|
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
|
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
|
- parser = XPathParser.new
|
|
+ parser = XPathParser.new(**options)
|
|
parser.namespaces = namespaces
|
|
parser.variables = variables
|
|
path = "*" unless path
|
|
@@ -69,8 +69,8 @@ module REXML
|
|
end
|
|
|
|
# Returns an array of nodes matching a given XPath.
|
|
- def XPath::match element, path=nil, namespaces=nil, variables={}
|
|
- parser = XPathParser.new
|
|
+ def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
|
|
+ parser = XPathParser.new(**options)
|
|
parser.namespaces = namespaces
|
|
parser.variables = variables
|
|
path = "*" unless path
|
|
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
|
|
index 181b2b6..5eb1e5a 100644
|
|
--- a/lib/rexml/xpath_parser.rb
|
|
+++ b/lib/rexml/xpath_parser.rb
|
|
@@ -1,43 +1,51 @@
|
|
# frozen_string_literal: false
|
|
-require 'rexml/namespace'
|
|
-require 'rexml/xmltokens'
|
|
-require 'rexml/attribute'
|
|
-require 'rexml/syncenumerator'
|
|
-require 'rexml/parsers/xpathparser'
|
|
-
|
|
-class Object
|
|
- # provides a unified +clone+ operation, for REXML::XPathParser
|
|
- # to use across multiple Object types
|
|
- def dclone
|
|
- clone
|
|
- end
|
|
-end
|
|
-class Symbol
|
|
- # provides a unified +clone+ operation, for REXML::XPathParser
|
|
- # to use across multiple Object types
|
|
- def dclone ; self ; end
|
|
-end
|
|
-class Integer
|
|
- # provides a unified +clone+ operation, for REXML::XPathParser
|
|
- # to use across multiple Object types
|
|
- def dclone ; self ; end
|
|
-end
|
|
-class Float
|
|
- # provides a unified +clone+ operation, for REXML::XPathParser
|
|
- # to use across multiple Object types
|
|
- def dclone ; self ; end
|
|
-end
|
|
-class Array
|
|
- # provides a unified +clone+ operation, for REXML::XPathParser
|
|
- # to use across multiple Object+ types
|
|
- def dclone
|
|
- klone = self.clone
|
|
- klone.clear
|
|
- self.each{|v| klone << v.dclone}
|
|
- klone
|
|
+
|
|
+require "pp"
|
|
+
|
|
+require_relative 'namespace'
|
|
+require_relative 'xmltokens'
|
|
+require_relative 'attribute'
|
|
+require_relative 'parsers/xpathparser'
|
|
+
|
|
+module REXML
|
|
+ module DClonable
|
|
+ refine Object do
|
|
+ # provides a unified +clone+ operation, for REXML::XPathParser
|
|
+ # to use across multiple Object types
|
|
+ def dclone
|
|
+ clone
|
|
+ end
|
|
+ end
|
|
+ refine Symbol do
|
|
+ # provides a unified +clone+ operation, for REXML::XPathParser
|
|
+ # to use across multiple Object types
|
|
+ def dclone ; self ; end
|
|
+ end
|
|
+ refine Integer do
|
|
+ # provides a unified +clone+ operation, for REXML::XPathParser
|
|
+ # to use across multiple Object types
|
|
+ def dclone ; self ; end
|
|
+ end
|
|
+ refine Float do
|
|
+ # provides a unified +clone+ operation, for REXML::XPathParser
|
|
+ # to use across multiple Object types
|
|
+ def dclone ; self ; end
|
|
+ end
|
|
+ refine Array do
|
|
+ # provides a unified +clone+ operation, for REXML::XPathParser
|
|
+ # to use across multiple Object+ types
|
|
+ def dclone
|
|
+ klone = self.clone
|
|
+ klone.clear
|
|
+ self.each{|v| klone << v.dclone}
|
|
+ klone
|
|
+ end
|
|
+ end
|
|
end
|
|
end
|
|
|
|
+using REXML::DClonable
|
|
+
|
|
module REXML
|
|
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
# for this class. Believe me. You don't want to poke around in here.
|
|
@@ -47,10 +55,15 @@ module REXML
|
|
include XMLTokens
|
|
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
|
|
|
- def initialize( )
|
|
+ DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true")
|
|
+
|
|
+ def initialize(strict: false)
|
|
+ @debug = DEBUG
|
|
@parser = REXML::Parsers::XPathParser.new
|
|
@namespaces = nil
|
|
@variables = {}
|
|
+ @nest = 0
|
|
+ @strict = strict
|
|
end
|
|
|
|
def namespaces=( namespaces={} )
|
|
@@ -75,7 +88,7 @@ module REXML
|
|
|
|
def predicate path, nodeset
|
|
path_stack = @parser.parse( path )
|
|
- expr( path_stack, nodeset )
|
|
+ match( path_stack, nodeset )
|
|
end
|
|
|
|
def []=( variable_name, value )
|
|
@@ -123,13 +136,24 @@ module REXML
|
|
end
|
|
|
|
|
|
- def match( path_stack, nodeset )
|
|
- r = expr( path_stack, nodeset )
|
|
- r
|
|
+ def match(path_stack, nodeset)
|
|
+ nodeset = nodeset.collect.with_index do |node, i|
|
|
+ position = i + 1
|
|
+ XPathNode.new(node, position: position)
|
|
+ end
|
|
+ result = expr(path_stack, nodeset)
|
|
+ case result
|
|
+ when Array # nodeset
|
|
+ unnode(result)
|
|
+ else
|
|
+ [result]
|
|
+ end
|
|
end
|
|
|
|
private
|
|
-
|
|
+ def strict?
|
|
+ @strict
|
|
+ end
|
|
|
|
# Returns a String namespace for a node, given a prefix
|
|
# The rules are:
|
|
@@ -148,343 +172,481 @@ module REXML
|
|
|
|
# Expr takes a stack of path elements and a set of nodes (either a Parent
|
|
# or an Array and returns an Array of matching nodes
|
|
- ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
|
|
- ELEMENTS = [ :element ]
|
|
def expr( path_stack, nodeset, context=nil )
|
|
- node_types = ELEMENTS
|
|
+ enter(:expr, path_stack, nodeset) if @debug
|
|
return nodeset if path_stack.length == 0 || nodeset.length == 0
|
|
while path_stack.length > 0
|
|
+ trace(:while, path_stack, nodeset) if @debug
|
|
if nodeset.length == 0
|
|
path_stack.clear
|
|
return []
|
|
end
|
|
- case (op = path_stack.shift)
|
|
+ op = path_stack.shift
|
|
+ case op
|
|
when :document
|
|
- nodeset = [ nodeset[0].root_node ]
|
|
-
|
|
- when :qname
|
|
- prefix = path_stack.shift
|
|
- name = path_stack.shift
|
|
- nodeset.delete_if do |node|
|
|
- # FIXME: This DOUBLES the time XPath searches take
|
|
- ns = get_namespace( node, prefix )
|
|
- if node.node_type == :element
|
|
- if node.name == name
|
|
- end
|
|
- end
|
|
- !(node.node_type == :element and
|
|
- node.name == name and
|
|
- node.namespace == ns )
|
|
- end
|
|
- node_types = ELEMENTS
|
|
-
|
|
- when :any
|
|
- nodeset.delete_if { |node| !node_types.include?(node.node_type) }
|
|
-
|
|
+ first_raw_node = nodeset.first.raw_node
|
|
+ nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
|
|
when :self
|
|
- # This space left intentionally blank
|
|
-
|
|
- when :processing_instruction
|
|
- target = path_stack.shift
|
|
- nodeset.delete_if do |node|
|
|
- (node.node_type != :processing_instruction) or
|
|
- ( target!='' and ( node.target != target ) )
|
|
+ nodeset = step(path_stack) do
|
|
+ [nodeset]
|
|
end
|
|
-
|
|
- when :text
|
|
- nodeset.delete_if { |node| node.node_type != :text }
|
|
-
|
|
- when :comment
|
|
- nodeset.delete_if { |node| node.node_type != :comment }
|
|
-
|
|
- when :node
|
|
- # This space left intentionally blank
|
|
- node_types = ALL
|
|
-
|
|
when :child
|
|
- new_nodeset = []
|
|
- nt = nil
|
|
- nodeset.each do |node|
|
|
- nt = node.node_type
|
|
- new_nodeset += node.children if nt == :element or nt == :document
|
|
+ nodeset = step(path_stack) do
|
|
+ child(nodeset)
|
|
end
|
|
- nodeset = new_nodeset
|
|
- node_types = ELEMENTS
|
|
-
|
|
when :literal
|
|
+ trace(:literal, path_stack, nodeset) if @debug
|
|
return path_stack.shift
|
|
-
|
|
when :attribute
|
|
- new_nodeset = []
|
|
- case path_stack.shift
|
|
- when :qname
|
|
- prefix = path_stack.shift
|
|
- name = path_stack.shift
|
|
- for element in nodeset
|
|
- if element.node_type == :element
|
|
- attrib = element.attribute( name, get_namespace(element, prefix) )
|
|
- new_nodeset << attrib if attrib
|
|
+ nodeset = step(path_stack, any_type: :attribute) do
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ next unless raw_node.node_type == :element
|
|
+ attributes = raw_node.attributes
|
|
+ next if attributes.empty?
|
|
+ nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
|
|
+ XPathNode.new(attribute, position: i + 1)
|
|
end
|
|
end
|
|
- when :any
|
|
- for element in nodeset
|
|
- if element.node_type == :element
|
|
- new_nodeset += element.attributes.to_a
|
|
+ nodesets
|
|
+ end
|
|
+ when :namespace
|
|
+ pre_defined_namespaces = {
|
|
+ "xml" => "http://www.w3.org/XML/1998/namespace",
|
|
+ }
|
|
+ nodeset = step(path_stack, any_type: :namespace) do
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ case raw_node.node_type
|
|
+ when :element
|
|
+ if @namespaces
|
|
+ nodesets << pre_defined_namespaces.merge(@namespaces)
|
|
+ else
|
|
+ nodesets << pre_defined_namespaces.merge(raw_node.namespaces)
|
|
+ end
|
|
+ when :attribute
|
|
+ if @namespaces
|
|
+ nodesets << pre_defined_namespaces.merge(@namespaces)
|
|
+ else
|
|
+ nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces)
|
|
+ end
|
|
end
|
|
end
|
|
+ nodesets
|
|
end
|
|
- nodeset = new_nodeset
|
|
-
|
|
when :parent
|
|
- nodeset = nodeset.collect{|n| n.parent}.compact
|
|
- #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
|
|
- node_types = ELEMENTS
|
|
-
|
|
- when :ancestor
|
|
- new_nodeset = []
|
|
- nodeset.each do |node|
|
|
- while node.parent
|
|
- node = node.parent
|
|
- new_nodeset << node unless new_nodeset.include? node
|
|
+ nodeset = step(path_stack) do
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ if raw_node.node_type == :attribute
|
|
+ parent = raw_node.element
|
|
+ else
|
|
+ parent = raw_node.parent
|
|
+ end
|
|
+ nodesets << [XPathNode.new(parent, position: 1)] if parent
|
|
end
|
|
+ nodesets
|
|
end
|
|
- nodeset = new_nodeset
|
|
- node_types = ELEMENTS
|
|
-
|
|
- when :ancestor_or_self
|
|
- new_nodeset = []
|
|
- nodeset.each do |node|
|
|
- if node.node_type == :element
|
|
- new_nodeset << node
|
|
- while ( node.parent )
|
|
- node = node.parent
|
|
- new_nodeset << node unless new_nodeset.include? node
|
|
+ when :ancestor
|
|
+ nodeset = step(path_stack) do
|
|
+ nodesets = []
|
|
+ # new_nodes = {}
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ new_nodeset = []
|
|
+ while raw_node.parent
|
|
+ raw_node = raw_node.parent
|
|
+ # next if new_nodes.key?(node)
|
|
+ new_nodeset << XPathNode.new(raw_node,
|
|
+ position: new_nodeset.size + 1)
|
|
+ # new_nodes[node] = true
|
|
end
|
|
+ nodesets << new_nodeset unless new_nodeset.empty?
|
|
end
|
|
+ nodesets
|
|
end
|
|
- nodeset = new_nodeset
|
|
- node_types = ELEMENTS
|
|
-
|
|
- when :predicate
|
|
- new_nodeset = []
|
|
- subcontext = { :size => nodeset.size }
|
|
- pred = path_stack.shift
|
|
- nodeset.each_with_index { |node, index|
|
|
- subcontext[ :node ] = node
|
|
- subcontext[ :index ] = index+1
|
|
- pc = pred.dclone
|
|
- result = expr( pc, [node], subcontext )
|
|
- result = result[0] if result.kind_of? Array and result.length == 1
|
|
- if result.kind_of? Numeric
|
|
- new_nodeset << node if result == (index+1)
|
|
- elsif result.instance_of? Array
|
|
- if result.size > 0 and result.inject(false) {|k,s| s or k}
|
|
- new_nodeset << node if result.size > 0
|
|
+ when :ancestor_or_self
|
|
+ nodeset = step(path_stack) do
|
|
+ nodesets = []
|
|
+ # new_nodes = {}
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ next unless raw_node.node_type == :element
|
|
+ new_nodeset = [XPathNode.new(raw_node, position: 1)]
|
|
+ # new_nodes[node] = true
|
|
+ while raw_node.parent
|
|
+ raw_node = raw_node.parent
|
|
+ # next if new_nodes.key?(node)
|
|
+ new_nodeset << XPathNode.new(raw_node,
|
|
+ position: new_nodeset.size + 1)
|
|
+ # new_nodes[node] = true
|
|
end
|
|
- else
|
|
- new_nodeset << node if result
|
|
+ nodesets << new_nodeset unless new_nodeset.empty?
|
|
end
|
|
- }
|
|
- nodeset = new_nodeset
|
|
-=begin
|
|
- predicate = path_stack.shift
|
|
- ns = nodeset.clone
|
|
- result = expr( predicate, ns )
|
|
- if result.kind_of? Array
|
|
- nodeset = result.zip(ns).collect{|m,n| n if m}.compact
|
|
- else
|
|
- nodeset = result ? nodeset : []
|
|
+ nodesets
|
|
end
|
|
-=end
|
|
-
|
|
when :descendant_or_self
|
|
- rv = descendant_or_self( path_stack, nodeset )
|
|
- path_stack.clear
|
|
- nodeset = rv
|
|
- node_types = ELEMENTS
|
|
-
|
|
+ nodeset = step(path_stack) do
|
|
+ descendant(nodeset, true)
|
|
+ end
|
|
when :descendant
|
|
- results = []
|
|
- nt = nil
|
|
- nodeset.each do |node|
|
|
- nt = node.node_type
|
|
- results += expr( path_stack.dclone.unshift( :descendant_or_self ),
|
|
- node.children ) if nt == :element or nt == :document
|
|
+ nodeset = step(path_stack) do
|
|
+ descendant(nodeset, false)
|
|
end
|
|
- nodeset = results
|
|
- node_types = ELEMENTS
|
|
-
|
|
when :following_sibling
|
|
- results = []
|
|
- nodeset.each do |node|
|
|
- next if node.parent.nil?
|
|
- all_siblings = node.parent.children
|
|
- current_index = all_siblings.index( node )
|
|
- following_siblings = all_siblings[ current_index+1 .. -1 ]
|
|
- results += expr( path_stack.dclone, following_siblings )
|
|
+ nodeset = step(path_stack) do
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ next unless raw_node.respond_to?(:parent)
|
|
+ next if raw_node.parent.nil?
|
|
+ all_siblings = raw_node.parent.children
|
|
+ current_index = all_siblings.index(raw_node)
|
|
+ following_siblings = all_siblings[(current_index + 1)..-1]
|
|
+ next if following_siblings.empty?
|
|
+ nodesets << following_siblings.collect.with_index do |sibling, i|
|
|
+ XPathNode.new(sibling, position: i + 1)
|
|
+ end
|
|
+ end
|
|
+ nodesets
|
|
end
|
|
- nodeset = results
|
|
-
|
|
when :preceding_sibling
|
|
- results = []
|
|
- nodeset.each do |node|
|
|
- next if node.parent.nil?
|
|
- all_siblings = node.parent.children
|
|
- current_index = all_siblings.index( node )
|
|
- preceding_siblings = all_siblings[ 0, current_index ].reverse
|
|
- results += preceding_siblings
|
|
+ nodeset = step(path_stack, order: :reverse) do
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ next unless raw_node.respond_to?(:parent)
|
|
+ next if raw_node.parent.nil?
|
|
+ all_siblings = raw_node.parent.children
|
|
+ current_index = all_siblings.index(raw_node)
|
|
+ preceding_siblings = all_siblings[0, current_index].reverse
|
|
+ next if preceding_siblings.empty?
|
|
+ nodesets << preceding_siblings.collect.with_index do |sibling, i|
|
|
+ XPathNode.new(sibling, position: i + 1)
|
|
+ end
|
|
+ end
|
|
+ nodesets
|
|
end
|
|
- nodeset = results
|
|
- node_types = ELEMENTS
|
|
-
|
|
when :preceding
|
|
- new_nodeset = []
|
|
- nodeset.each do |node|
|
|
- new_nodeset += preceding( node )
|
|
+ nodeset = step(path_stack, order: :reverse) do
|
|
+ unnode(nodeset) do |node|
|
|
+ preceding(node)
|
|
+ end
|
|
end
|
|
- nodeset = new_nodeset
|
|
- node_types = ELEMENTS
|
|
-
|
|
when :following
|
|
- new_nodeset = []
|
|
- nodeset.each do |node|
|
|
- new_nodeset += following( node )
|
|
- end
|
|
- nodeset = new_nodeset
|
|
- node_types = ELEMENTS
|
|
-
|
|
- when :namespace
|
|
- new_nodeset = []
|
|
- prefix = path_stack.shift
|
|
- nodeset.each do |node|
|
|
- if (node.node_type == :element or node.node_type == :attribute)
|
|
- if @namespaces
|
|
- namespaces = @namespaces
|
|
- elsif (node.node_type == :element)
|
|
- namespaces = node.namespaces
|
|
- else
|
|
- namespaces = node.element.namesapces
|
|
- end
|
|
- if (node.namespace == namespaces[prefix])
|
|
- new_nodeset << node
|
|
- end
|
|
+ nodeset = step(path_stack) do
|
|
+ unnode(nodeset) do |node|
|
|
+ following(node)
|
|
end
|
|
end
|
|
- nodeset = new_nodeset
|
|
-
|
|
when :variable
|
|
var_name = path_stack.shift
|
|
- return @variables[ var_name ]
|
|
+ return [@variables[var_name]]
|
|
|
|
- # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
|
- # TODO: Special case for :or and :and -- not evaluate the right
|
|
- # operand if the left alone determines result (i.e. is true for
|
|
- # :or and false for :and).
|
|
- when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
|
|
+ when :eq, :neq, :lt, :lteq, :gt, :gteq
|
|
left = expr( path_stack.shift, nodeset.dup, context )
|
|
right = expr( path_stack.shift, nodeset.dup, context )
|
|
res = equality_relational_compare( left, op, right )
|
|
+ trace(op, left, right, res) if @debug
|
|
return res
|
|
|
|
+ when :or
|
|
+ left = expr(path_stack.shift, nodeset.dup, context)
|
|
+ return true if Functions.boolean(left)
|
|
+ right = expr(path_stack.shift, nodeset.dup, context)
|
|
+ return Functions.boolean(right)
|
|
+
|
|
when :and
|
|
- left = expr( path_stack.shift, nodeset.dup, context )
|
|
- return [] unless left
|
|
- if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
|
|
- return []
|
|
+ left = expr(path_stack.shift, nodeset.dup, context)
|
|
+ return false unless Functions.boolean(left)
|
|
+ right = expr(path_stack.shift, nodeset.dup, context)
|
|
+ return Functions.boolean(right)
|
|
+
|
|
+ when :div, :mod, :mult, :plus, :minus
|
|
+ left = expr(path_stack.shift, nodeset, context)
|
|
+ right = expr(path_stack.shift, nodeset, context)
|
|
+ left = unnode(left) if left.is_a?(Array)
|
|
+ right = unnode(right) if right.is_a?(Array)
|
|
+ left = Functions::number(left)
|
|
+ right = Functions::number(right)
|
|
+ case op
|
|
+ when :div
|
|
+ return left / right
|
|
+ when :mod
|
|
+ return left % right
|
|
+ when :mult
|
|
+ return left * right
|
|
+ when :plus
|
|
+ return left + right
|
|
+ when :minus
|
|
+ return left - right
|
|
+ else
|
|
+ raise "[BUG] Unexpected operator: <#{op.inspect}>"
|
|
end
|
|
- right = expr( path_stack.shift, nodeset.dup, context )
|
|
- res = equality_relational_compare( left, op, right )
|
|
- return res
|
|
-
|
|
- when :div
|
|
- left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
|
|
- right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
|
|
- return (left / right)
|
|
-
|
|
- when :mod
|
|
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- return (left % right)
|
|
-
|
|
- when :mult
|
|
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- return (left * right)
|
|
-
|
|
- when :plus
|
|
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- return (left + right)
|
|
-
|
|
- when :minus
|
|
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
|
|
- return (left - right)
|
|
-
|
|
when :union
|
|
left = expr( path_stack.shift, nodeset, context )
|
|
right = expr( path_stack.shift, nodeset, context )
|
|
+ left = unnode(left) if left.is_a?(Array)
|
|
+ right = unnode(right) if right.is_a?(Array)
|
|
return (left | right)
|
|
-
|
|
when :neg
|
|
res = expr( path_stack, nodeset, context )
|
|
- return -(res.to_f)
|
|
-
|
|
+ res = unnode(res) if res.is_a?(Array)
|
|
+ return -Functions.number(res)
|
|
when :not
|
|
when :function
|
|
func_name = path_stack.shift.tr('-','_')
|
|
arguments = path_stack.shift
|
|
- subcontext = context ? nil : { :size => nodeset.size }
|
|
-
|
|
- res = []
|
|
- cont = context
|
|
- nodeset.each_with_index { |n, i|
|
|
- if subcontext
|
|
- subcontext[:node] = n
|
|
- subcontext[:index] = i
|
|
- cont = subcontext
|
|
+
|
|
+ if nodeset.size != 1
|
|
+ message = "[BUG] Node set size must be 1 for function call: "
|
|
+ message += "<#{func_name}>: <#{nodeset.inspect}>: "
|
|
+ message += "<#{arguments.inspect}>"
|
|
+ raise message
|
|
+ end
|
|
+
|
|
+ node = nodeset.first
|
|
+ if context
|
|
+ target_context = context
|
|
+ else
|
|
+ target_context = {:size => nodeset.size}
|
|
+ if node.is_a?(XPathNode)
|
|
+ target_context[:node] = node.raw_node
|
|
+ target_context[:index] = node.position
|
|
+ else
|
|
+ target_context[:node] = node
|
|
+ target_context[:index] = 1
|
|
end
|
|
- arg_clone = arguments.dclone
|
|
- args = arg_clone.collect { |arg|
|
|
- expr( arg, [n], cont )
|
|
- }
|
|
- Functions.context = cont
|
|
- res << Functions.send( func_name, *args )
|
|
- }
|
|
- return res
|
|
+ end
|
|
+ args = arguments.dclone.collect do |arg|
|
|
+ result = expr(arg, nodeset, target_context)
|
|
+ result = unnode(result) if result.is_a?(Array)
|
|
+ result
|
|
+ end
|
|
+ Functions.context = target_context
|
|
+ return Functions.send(func_name, *args)
|
|
|
|
+ else
|
|
+ raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
|
|
end
|
|
end # while
|
|
return nodeset
|
|
+ ensure
|
|
+ leave(:expr, path_stack, nodeset) if @debug
|
|
+ end
|
|
+
|
|
+ def step(path_stack, any_type: :element, order: :forward)
|
|
+ nodesets = yield
|
|
+ begin
|
|
+ enter(:step, path_stack, nodesets) if @debug
|
|
+ nodesets = node_test(path_stack, nodesets, any_type: any_type)
|
|
+ while path_stack[0] == :predicate
|
|
+ path_stack.shift # :predicate
|
|
+ predicate_expression = path_stack.shift.dclone
|
|
+ nodesets = evaluate_predicate(predicate_expression, nodesets)
|
|
+ end
|
|
+ if nodesets.size == 1
|
|
+ ordered_nodeset = nodesets[0]
|
|
+ else
|
|
+ raw_nodes = []
|
|
+ nodesets.each do |nodeset|
|
|
+ nodeset.each do |node|
|
|
+ if node.respond_to?(:raw_node)
|
|
+ raw_nodes << node.raw_node
|
|
+ else
|
|
+ raw_nodes << node
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ ordered_nodeset = sort(raw_nodes, order)
|
|
+ end
|
|
+ new_nodeset = []
|
|
+ ordered_nodeset.each do |node|
|
|
+ # TODO: Remove duplicated
|
|
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
+ end
|
|
+ new_nodeset
|
|
+ ensure
|
|
+ leave(:step, path_stack, new_nodeset) if @debug
|
|
+ end
|
|
end
|
|
|
|
+ def node_test(path_stack, nodesets, any_type: :element)
|
|
+ enter(:node_test, path_stack, nodesets) if @debug
|
|
+ operator = path_stack.shift
|
|
+ case operator
|
|
+ when :qname
|
|
+ prefix = path_stack.shift
|
|
+ name = path_stack.shift
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ case raw_node.node_type
|
|
+ when :element
|
|
+ if prefix.nil?
|
|
+ raw_node.name == name
|
|
+ elsif prefix.empty?
|
|
+ if strict?
|
|
+ raw_node.name == name and raw_node.namespace == ""
|
|
+ else
|
|
+ # FIXME: This DOUBLES the time XPath searches take
|
|
+ ns = get_namespace(raw_node, prefix)
|
|
+ raw_node.name == name and raw_node.namespace == ns
|
|
+ end
|
|
+ else
|
|
+ # FIXME: This DOUBLES the time XPath searches take
|
|
+ ns = get_namespace(raw_node, prefix)
|
|
+ raw_node.name == name and raw_node.namespace == ns
|
|
+ end
|
|
+ when :attribute
|
|
+ if prefix.nil?
|
|
+ raw_node.name == name
|
|
+ elsif prefix.empty?
|
|
+ raw_node.name == name and raw_node.namespace == ""
|
|
+ else
|
|
+ # FIXME: This DOUBLES the time XPath searches take
|
|
+ ns = get_namespace(raw_node.element, prefix)
|
|
+ raw_node.name == name and raw_node.namespace == ns
|
|
+ end
|
|
+ else
|
|
+ false
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ when :namespace
|
|
+ prefix = path_stack.shift
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ case raw_node.node_type
|
|
+ when :element
|
|
+ namespaces = @namespaces || raw_node.namespaces
|
|
+ raw_node.namespace == namespaces[prefix]
|
|
+ when :attribute
|
|
+ namespaces = @namespaces || raw_node.element.namespaces
|
|
+ raw_node.namespace == namespaces[prefix]
|
|
+ else
|
|
+ false
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ when :any
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ raw_node.node_type == any_type
|
|
+ end
|
|
+ end
|
|
+ when :comment
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ raw_node.node_type == :comment
|
|
+ end
|
|
+ end
|
|
+ when :text
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ raw_node.node_type == :text
|
|
+ end
|
|
+ end
|
|
+ when :processing_instruction
|
|
+ target = path_stack.shift
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ raw_node = node.raw_node
|
|
+ (raw_node.node_type == :processing_instruction) and
|
|
+ (target.empty? or (raw_node.target == target))
|
|
+ end
|
|
+ end
|
|
+ when :node
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ filter_nodeset(nodeset) do |node|
|
|
+ true
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ message = "[BUG] Unexpected node test: " +
|
|
+ "<#{operator.inspect}>: <#{path_stack.inspect}>"
|
|
+ raise message
|
|
+ end
|
|
+ new_nodesets
|
|
+ ensure
|
|
+ leave(:node_test, path_stack, new_nodesets) if @debug
|
|
+ end
|
|
|
|
- ##########################################################
|
|
- # FIXME
|
|
- # The next two methods are BAD MOJO!
|
|
- # This is my achilles heel. If anybody thinks of a better
|
|
- # way of doing this, be my guest. This really sucks, but
|
|
- # it is a wonder it works at all.
|
|
- # ########################################################
|
|
+ def filter_nodeset(nodeset)
|
|
+ new_nodeset = []
|
|
+ nodeset.each do |node|
|
|
+ next unless yield(node)
|
|
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
|
+ end
|
|
+ new_nodeset
|
|
+ end
|
|
|
|
- def descendant_or_self( path_stack, nodeset )
|
|
- rs = []
|
|
- d_o_s( path_stack, nodeset, rs )
|
|
- document_order(rs.flatten.compact)
|
|
- #rs.flatten.compact
|
|
+ def evaluate_predicate(expression, nodesets)
|
|
+ enter(:predicate, expression, nodesets) if @debug
|
|
+ new_nodeset_count = 0
|
|
+ new_nodesets = nodesets.collect do |nodeset|
|
|
+ new_nodeset = []
|
|
+ subcontext = { :size => nodeset.size }
|
|
+ nodeset.each_with_index do |node, index|
|
|
+ if node.is_a?(XPathNode)
|
|
+ subcontext[:node] = node.raw_node
|
|
+ subcontext[:index] = node.position
|
|
+ else
|
|
+ subcontext[:node] = node
|
|
+ subcontext[:index] = index + 1
|
|
+ end
|
|
+ result = expr(expression.dclone, [node], subcontext)
|
|
+ trace(:predicate_evaluate, expression, node, subcontext, result) if @debug
|
|
+ result = result[0] if result.kind_of? Array and result.length == 1
|
|
+ if result.kind_of? Numeric
|
|
+ if result == node.position
|
|
+ new_nodeset_count += 1
|
|
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
|
+ end
|
|
+ elsif result.instance_of? Array
|
|
+ if result.size > 0 and result.inject(false) {|k,s| s or k}
|
|
+ if result.size > 0
|
|
+ new_nodeset_count += 1
|
|
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ if result
|
|
+ new_nodeset_count += 1
|
|
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ new_nodeset
|
|
+ end
|
|
+ new_nodesets
|
|
+ ensure
|
|
+ leave(:predicate, new_nodesets) if @debug
|
|
end
|
|
|
|
- def d_o_s( p, ns, r )
|
|
- nt = nil
|
|
- ns.each_index do |i|
|
|
- n = ns[i]
|
|
- x = expr( p.dclone, [ n ] )
|
|
- nt = n.node_type
|
|
- d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
|
|
- r.concat(x) if x.size > 0
|
|
+ def trace(*args)
|
|
+ indent = " " * @nest
|
|
+ PP.pp(args, "").each_line do |line|
|
|
+ puts("#{indent}#{line}")
|
|
end
|
|
end
|
|
|
|
+ def enter(tag, *args)
|
|
+ trace(:enter, tag, *args)
|
|
+ @nest += 1
|
|
+ end
|
|
+
|
|
+ def leave(tag, *args)
|
|
+ @nest -= 1
|
|
+ trace(:leave, tag, *args)
|
|
+ end
|
|
|
|
# Reorders an array of nodes so that they are in document order
|
|
# It tries to do this efficiently.
|
|
@@ -494,7 +656,7 @@ module REXML
|
|
# in and out of function calls. If I knew what the index of the nodes was,
|
|
# I wouldn't have to do this. Maybe add a document IDX for each node?
|
|
# Problems with mutable documents. Or, rewrite everything.
|
|
- def document_order( array_of_nodes )
|
|
+ def sort(array_of_nodes, order)
|
|
new_arry = []
|
|
array_of_nodes.each { |node|
|
|
node_idx = []
|
|
@@ -505,42 +667,68 @@ module REXML
|
|
end
|
|
new_arry << [ node_idx.reverse, node ]
|
|
}
|
|
- new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
|
|
+ ordered = new_arry.sort_by do |index, node|
|
|
+ if order == :forward
|
|
+ index
|
|
+ else
|
|
+ -index
|
|
+ end
|
|
+ end
|
|
+ ordered.collect do |_index, node|
|
|
+ node
|
|
+ end
|
|
end
|
|
|
|
-
|
|
- def recurse( nodeset, &block )
|
|
- for node in nodeset
|
|
- yield node
|
|
- recurse( node, &block ) if node.node_type == :element
|
|
+ def descendant(nodeset, include_self)
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ new_nodeset = []
|
|
+ new_nodes = {}
|
|
+ descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
|
|
+ nodesets << new_nodeset unless new_nodeset.empty?
|
|
end
|
|
+ nodesets
|
|
end
|
|
|
|
+ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
|
|
+ if include_self
|
|
+ return if new_nodes.key?(raw_node)
|
|
+ new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
|
|
+ new_nodes[raw_node] = true
|
|
+ end
|
|
|
|
+ node_type = raw_node.node_type
|
|
+ if node_type == :element or node_type == :document
|
|
+ raw_node.children.each do |child|
|
|
+ descendant_recursive(child, new_nodeset, new_nodes, true)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
|
|
# Builds a nodeset of all of the preceding nodes of the supplied node,
|
|
# in reverse document order
|
|
# preceding:: includes every element in the document that precedes this node,
|
|
# except for ancestors
|
|
- def preceding( node )
|
|
+ def preceding(node)
|
|
ancestors = []
|
|
- p = node.parent
|
|
- while p
|
|
- ancestors << p
|
|
- p = p.parent
|
|
+ parent = node.parent
|
|
+ while parent
|
|
+ ancestors << parent
|
|
+ parent = parent.parent
|
|
end
|
|
|
|
- acc = []
|
|
- p = preceding_node_of( node )
|
|
- while p
|
|
- if ancestors.include? p
|
|
- ancestors.delete(p)
|
|
+ precedings = []
|
|
+ preceding_node = preceding_node_of(node)
|
|
+ while preceding_node
|
|
+ if ancestors.include?(preceding_node)
|
|
+ ancestors.delete(preceding_node)
|
|
else
|
|
- acc << p
|
|
+ precedings << XPathNode.new(preceding_node,
|
|
+ position: precedings.size + 1)
|
|
end
|
|
- p = preceding_node_of( p )
|
|
+ preceding_node = preceding_node_of(preceding_node)
|
|
end
|
|
- acc
|
|
+ precedings
|
|
end
|
|
|
|
def preceding_node_of( node )
|
|
@@ -558,14 +746,15 @@ module REXML
|
|
psn
|
|
end
|
|
|
|
- def following( node )
|
|
- acc = []
|
|
- p = next_sibling_node( node )
|
|
- while p
|
|
- acc << p
|
|
- p = following_node_of( p )
|
|
+ def following(node)
|
|
+ followings = []
|
|
+ following_node = next_sibling_node(node)
|
|
+ while following_node
|
|
+ followings << XPathNode.new(following_node,
|
|
+ position: followings.size + 1)
|
|
+ following_node = following_node_of(following_node)
|
|
end
|
|
- acc
|
|
+ followings
|
|
end
|
|
|
|
def following_node_of( node )
|
|
@@ -587,45 +776,68 @@ module REXML
|
|
return psn
|
|
end
|
|
|
|
+ def child(nodeset)
|
|
+ nodesets = []
|
|
+ nodeset.each do |node|
|
|
+ raw_node = node.raw_node
|
|
+ node_type = raw_node.node_type
|
|
+ # trace(:child, node_type, node)
|
|
+ case node_type
|
|
+ when :element
|
|
+ nodesets << raw_node.children.collect.with_index do |child_node, i|
|
|
+ XPathNode.new(child_node, position: i + 1)
|
|
+ end
|
|
+ when :document
|
|
+ new_nodeset = []
|
|
+ raw_node.children.each do |child|
|
|
+ case child
|
|
+ when XMLDecl, Text
|
|
+ # Ignore
|
|
+ else
|
|
+ new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
|
|
+ end
|
|
+ end
|
|
+ nodesets << new_nodeset unless new_nodeset.empty?
|
|
+ end
|
|
+ end
|
|
+ nodesets
|
|
+ end
|
|
+
|
|
def norm b
|
|
case b
|
|
when true, false
|
|
return b
|
|
when 'true', 'false'
|
|
return Functions::boolean( b )
|
|
- when /^\d+(\.\d+)?$/
|
|
+ when /^\d+(\.\d+)?$/, Numeric
|
|
return Functions::number( b )
|
|
else
|
|
return Functions::string( b )
|
|
end
|
|
end
|
|
|
|
- def equality_relational_compare( set1, op, set2 )
|
|
+ def equality_relational_compare(set1, op, set2)
|
|
+ set1 = unnode(set1) if set1.is_a?(Array)
|
|
+ set2 = unnode(set2) if set2.is_a?(Array)
|
|
+
|
|
if set1.kind_of? Array and set2.kind_of? Array
|
|
- if set1.size == 1 and set2.size == 1
|
|
- set1 = set1[0]
|
|
- set2 = set2[0]
|
|
- elsif set1.size == 0 or set2.size == 0
|
|
- nd = set1.size==0 ? set2 : set1
|
|
- rv = nd.collect { |il| compare( il, op, nil ) }
|
|
- return rv
|
|
- else
|
|
- res = []
|
|
- SyncEnumerator.new( set1, set2 ).each { |i1, i2|
|
|
- i1 = norm( i1 )
|
|
- i2 = norm( i2 )
|
|
- res << compare( i1, op, i2 )
|
|
- }
|
|
- return res
|
|
+ # If both objects to be compared are node-sets, then the
|
|
+ # comparison will be true if and only if there is a node in the
|
|
+ # first node-set and a node in the second node-set such that the
|
|
+ # result of performing the comparison on the string-values of
|
|
+ # the two nodes is true.
|
|
+ set1.product(set2).any? do |node1, node2|
|
|
+ node_string1 = Functions.string(node1)
|
|
+ node_string2 = Functions.string(node2)
|
|
+ compare(node_string1, op, node_string2)
|
|
end
|
|
- end
|
|
- # If one is nodeset and other is number, compare number to each item
|
|
- # in nodeset s.t. number op number(string(item))
|
|
- # If one is nodeset and other is string, compare string to each item
|
|
- # in nodeset s.t. string op string(item)
|
|
- # If one is nodeset and other is boolean, compare boolean to each item
|
|
- # in nodeset s.t. boolean op boolean(item)
|
|
- if set1.kind_of? Array or set2.kind_of? Array
|
|
+ elsif set1.kind_of? Array or set2.kind_of? Array
|
|
+ # If one is nodeset and other is number, compare number to each item
|
|
+ # in nodeset s.t. number op number(string(item))
|
|
+ # If one is nodeset and other is string, compare string to each item
|
|
+ # in nodeset s.t. string op string(item)
|
|
+ # If one is nodeset and other is boolean, compare boolean to each item
|
|
+ # in nodeset s.t. boolean op boolean(item)
|
|
if set1.kind_of? Array
|
|
a = set1
|
|
b = set2
|
|
@@ -636,15 +848,23 @@ module REXML
|
|
|
|
case b
|
|
when true, false
|
|
- return a.collect {|v| compare( Functions::boolean(v), op, b ) }
|
|
+ each_unnode(a).any? do |unnoded|
|
|
+ compare(Functions.boolean(unnoded), op, b)
|
|
+ end
|
|
when Numeric
|
|
- return a.collect {|v| compare( Functions::number(v), op, b )}
|
|
- when /^\d+(\.\d+)?$/
|
|
- b = Functions::number( b )
|
|
- return a.collect {|v| compare( Functions::number(v), op, b )}
|
|
+ each_unnode(a).any? do |unnoded|
|
|
+ compare(Functions.number(unnoded), op, b)
|
|
+ end
|
|
+ when /\A\d+(\.\d+)?\z/
|
|
+ b = Functions.number(b)
|
|
+ each_unnode(a).any? do |unnoded|
|
|
+ compare(Functions.number(unnoded), op, b)
|
|
+ end
|
|
else
|
|
- b = Functions::string( b )
|
|
- return a.collect { |v| compare( Functions::string(v), op, b ) }
|
|
+ b = Functions::string(b)
|
|
+ each_unnode(a).any? do |unnoded|
|
|
+ compare(Functions::string(unnoded), op, b)
|
|
+ end
|
|
end
|
|
else
|
|
# If neither is nodeset,
|
|
@@ -654,32 +874,52 @@ module REXML
|
|
# Else, convert to string
|
|
# Else
|
|
# Convert both to numbers and compare
|
|
- s1 = set1.to_s
|
|
- s2 = set2.to_s
|
|
- if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
|
|
- set1 = Functions::boolean( set1 )
|
|
- set2 = Functions::boolean( set2 )
|
|
+ compare(set1, op, set2)
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def value_type(value)
|
|
+ case value
|
|
+ when true, false
|
|
+ :boolean
|
|
+ when Numeric
|
|
+ :number
|
|
+ when String
|
|
+ :string
|
|
+ else
|
|
+ raise "[BUG] Unexpected value type: <#{value.inspect}>"
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def normalize_compare_values(a, operator, b)
|
|
+ a_type = value_type(a)
|
|
+ b_type = value_type(b)
|
|
+ case operator
|
|
+ when :eq, :neq
|
|
+ if a_type == :boolean or b_type == :boolean
|
|
+ a = Functions.boolean(a) unless a_type == :boolean
|
|
+ b = Functions.boolean(b) unless b_type == :boolean
|
|
+ elsif a_type == :number or b_type == :number
|
|
+ a = Functions.number(a) unless a_type == :number
|
|
+ b = Functions.number(b) unless b_type == :number
|
|
else
|
|
- if op == :eq or op == :neq
|
|
- if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
|
|
- set1 = Functions::number( s1 )
|
|
- set2 = Functions::number( s2 )
|
|
- else
|
|
- set1 = Functions::string( set1 )
|
|
- set2 = Functions::string( set2 )
|
|
- end
|
|
- else
|
|
- set1 = Functions::number( set1 )
|
|
- set2 = Functions::number( set2 )
|
|
- end
|
|
+ a = Functions.string(a) unless a_type == :string
|
|
+ b = Functions.string(b) unless b_type == :string
|
|
end
|
|
- return compare( set1, op, set2 )
|
|
+ when :lt, :lteq, :gt, :gteq
|
|
+ a = Functions.number(a) unless a_type == :number
|
|
+ b = Functions.number(b) unless b_type == :number
|
|
+ else
|
|
+ message = "[BUG] Unexpected compare operator: " +
|
|
+ "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
|
|
+ raise message
|
|
end
|
|
- return false
|
|
+ [a, b]
|
|
end
|
|
|
|
- def compare a, op, b
|
|
- case op
|
|
+ def compare(a, operator, b)
|
|
+ a, b = normalize_compare_values(a, operator, b)
|
|
+ case operator
|
|
when :eq
|
|
a == b
|
|
when :neq
|
|
@@ -692,13 +932,47 @@ module REXML
|
|
a > b
|
|
when :gteq
|
|
a >= b
|
|
- when :and
|
|
- a and b
|
|
- when :or
|
|
- a or b
|
|
else
|
|
- false
|
|
+ message = "[BUG] Unexpected compare operator: " +
|
|
+ "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
|
|
+ raise message
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def each_unnode(nodeset)
|
|
+ return to_enum(__method__, nodeset) unless block_given?
|
|
+ nodeset.each do |node|
|
|
+ if node.is_a?(XPathNode)
|
|
+ unnoded = node.raw_node
|
|
+ else
|
|
+ unnoded = node
|
|
+ end
|
|
+ yield(unnoded)
|
|
+ end
|
|
+ end
|
|
+
|
|
+ def unnode(nodeset)
|
|
+ each_unnode(nodeset).collect do |unnoded|
|
|
+ unnoded = yield(unnoded) if block_given?
|
|
+ unnoded
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+
|
|
+ # @private
|
|
+ class XPathNode
|
|
+ attr_reader :raw_node, :context
|
|
+ def initialize(node, context=nil)
|
|
+ if node.is_a?(XPathNode)
|
|
+ @raw_node = node.raw_node
|
|
+ else
|
|
+ @raw_node = node
|
|
end
|
|
+ @context = context || {}
|
|
+ end
|
|
+
|
|
+ def position
|
|
+ @context[:position]
|
|
end
|
|
end
|
|
end
|
|
--
|
|
2.27.0
|
|
|