module HTree::Doc::Trav

Public Instance Methods

author() click to toggle source

author searches author and return it as a text. It returns nil if not found.

author searchs following information.

  • <meta name=“author” content=“author-name”> in HTML

  • <link rev=“made” title=“author-name”> in HTML

  • <dc:creator>author-name</dc:creator> in RSS

  • <dc:publisher>author-name</dc:publisher> in RSS

  • <author><name>author-name</name></author> in Atom

# File htree/traverse.rb, line 304
def author
  traverse_element('meta',
    '{http://www.w3.org/1999/xhtml}meta') {|e|
    begin
      next unless e.fetch_attr('name').downcase == 'author'
      author = e.fetch_attribute('content').strip
      return author if !author.empty?
    rescue IndexError
    end
  }

  traverse_element('link',
    '{http://www.w3.org/1999/xhtml}link') {|e|
    begin
      next unless e.fetch_attr('rev').downcase == 'made'
      author = e.fetch_attribute('title').strip
      return author if !author.empty?
    rescue IndexError
    end
  } 

  if channel = find_element('{http://purl.org/rss/1.0/}channel')
    channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
      begin
        author = e.extract_text.strip
        return author if !author.empty?
      rescue IndexError
      end
    }
    channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e|
      begin
        author = e.extract_text.strip
        return author if !author.empty?
      rescue IndexError
      end
    }
  end

  ['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns|
    each_child {|top|
      next unless top.elem?
      if top.name == "{#{xmlns}}feed"
        if feed_author = find_element("{#{xmlns}}author")
          feed_author.traverse_element("{#{xmlns}}name") {|e|
            begin
              author = e.extract_text.strip
              return author if !author.empty?
            rescue IndexError
            end
          }
        end
      end
    }
  }

  nil
end
has_xmldecl?() click to toggle source

has_xmldecl? returns true if there is an XML declaration on top level.

# File htree/traverse.rb, line 377
def has_xmldecl?
  children.each {|c| return true if c.xmldecl? }
  false
end
root() click to toggle source

root searches root element. If there is no element on top level, it raise HTree::Error. If there is two or more elements on top level, it raise HTree::Error.

# File htree/traverse.rb, line 368
def root
  es = []
  children.each {|c| es << c if c.elem? }
  raise HTree::Error, "no element" if es.empty?
  raise HTree::Error, "multiple top elements" if 1 < es.length
  es[0]
end
title() click to toggle source

title searches title and return it as a text. It returns nil if not found.

title searchs following information.

  • <title>…</title> in HTML

  • <title>…</title> in RSS

  • <title>…</title> in Atom

# File htree/traverse.rb, line 284
def title
  e = find_element('title',
    '{http://www.w3.org/1999/xhtml}title',
    '{http://purl.org/rss/1.0/}title',
    '{http://my.netscape.com/rdf/simple/0.9/}title',
    '{http://www.w3.org/2005/Atom}title',
    '{http://purl.org/atom/ns#}title')
  e && e.extract_text
end
traverse_all_element(&block) click to toggle source
# File htree/traverse.rb, line 181
def traverse_all_element(&block)
  children.each {|c| c.traverse_all_element(&block) }
end
traverse_some_element(name_set, &block) click to toggle source
# File htree/traverse.rb, line 199
def traverse_some_element(name_set, &block)
  children.each {|c| c.traverse_some_element(name_set, &block) }
end