module Escape

Escape module provides several escape functions.

Public Instance Methods

_ltsv_key(str) click to toggle source
# File escape.rb, line 569
def _ltsv_key(str)
  if /[\0-\x1f":\\x7f]/ !~ str
    # ASCII control characters, '"', ':' and '\\' not found
    str
  else
    '"' +
    str.gsub(/[\0-\x1f":\\x7f]/) {
      ch = $&
      case ch
      when "\0"; '\0'
      when "\a"; '\a'
      when "\b"; '\b'
      when "\f"; '\f'
      when "\n"; '\n'
      when "\r"; '\r'
      when "\t"; '\t'
      when "\v"; '\v'
      when "\e"; '\e'
      else
        "\\x%02X" % ch.unpack("C")[0]
      end
    } +
    '"'
  end
end
_ltsv_val(str) click to toggle source
# File escape.rb, line 595
def _ltsv_val(str)
  if /[\0-\x1f"\\x7f]/ !~ str
    # ASCII control characters, '"', and '\\' not found
    str
  else
    '"' +
    str.gsub(/[\0-\x1f"\\x7f]/) {
      ch = $&
      case ch
      when "\0"; '\0'
      when "\a"; '\a'
      when "\b"; '\b'
      when "\f"; '\f'
      when "\n"; '\n'
      when "\r"; '\r'
      when "\t"; '\t'
      when "\v"; '\v'
      when "\e"; '\e'
      else
        "\\x%02X" % ch.unpack("C")[0]
      end
    } +
    '"'
  end
end
html_attr_value(str) click to toggle source

#html_attr_value encodes a string as a double-quoted HTML attribute using character references. It returns an instance of HTMLAttrValue.

Escape.html_attr_value("abc") #=> #<Escape::HTMLAttrValue: "abc">
Escape.html_attr_value("a&b") #=> #<Escape::HTMLAttrValue: "a&amp;b">
Escape.html_attr_value("ab&<>\"c") #=> #<Escape::HTMLAttrValue: "ab&amp;&lt;&gt;&quot;c">
Escape.html_attr_value("a'c") #=> #<Escape::HTMLAttrValue: "a'c">

It escapes 4 characters:

  • '&' to '&amp;'

  • '<' to '&lt;'

  • '>' to '&gt;'

  • '“' to '&quot;'

# File escape.rb, line 371
def html_attr_value(str)
  s = '"' + str.gsub(/[&<>"]/) {|ch| HTML_ATTR_ESCAPE_HASH[ch] } + '"'
  HTMLAttrValue.new_no_dup(s)
end
html_form(pairs, sep='&') click to toggle source

#html_form composes HTML form key-value pairs as a x-www-form-urlencoded encoded string. It returns an instance of PercentEncoded.

#html_form takes an array of pair of strings or an hash from string to string.

Escape.html_form([["a","b"], ["c","d"]]) #=> #<Escape::PercentEncoded: a=b&c=d>
Escape.html_form({"a"=>"b", "c"=>"d"}) #=> #<Escape::PercentEncoded: a=b&c=d>

In the array form, it is possible to use same key more than once. (It is required for a HTML form which contains checkboxes and select element with multiple attribute.)

Escape.html_form([["k","1"], ["k","2"]]) #=> #<Escape::PercentEncoded: k=1&k=2>

If the strings contains characters which must be escaped in x-www-form-urlencoded, they are escaped using %-encoding.

Escape.html_form([["k=","&;="]]) #=> #<Escape::PercentEncoded: k%3D=%26%3B%3D>

The separator can be specified by the optional second argument.

Escape.html_form([["a","b"], ["c","d"]], ";") #=> #<Escape::PercentEncoded: a=b;c=d>

See HTML 4.01 for details.

# File escape.rb, line 278
def html_form(pairs, sep='&')
  r = ''
  first = true
  pairs.each {|k, v|
    # query-chars - pct-encoded - x-www-form-urlencoded-delimiters =
    #   unreserved / "!" / "$" / "'" / "(" / ")" / "*" / "," / ":" / "@" / "/" / "?"
    # query-char - pct-encoded = unreserved / sub-delims / ":" / "@" / "/" / "?"
    # query-char = pchar / "/" / "?" = unreserved / pct-encoded / sub-delims / ":" / "@" / "/" / "?"
    # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
    # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
    # x-www-form-urlencoded-delimiters = "&" / "+" / ";" / "="
    r << sep if !first
    first = false
    k.each_byte {|byte|
      ch = byte.chr
      if ch == ' '
        r << "+"
      elsif %r{[^0-9A-Za-z\-\._~:/?@!\$'()*,]}n =~ ch
        r << "%" << ch.unpack("H2")[0].upcase
      else
        r << ch
      end
    }
    r << '='
    v.each_byte {|byte|
      ch = byte.chr
      if ch == ' '
        r << "+"
      elsif %r{[^0-9A-Za-z\-\._~:/?@!\$'()*,]}n =~ ch
        r << "%" << ch.unpack("H2")[0].upcase
      else
        r << ch
      end
    }
  }
  PercentEncoded.new_no_dup(r)
end
html_text(str) click to toggle source

#html_text escapes a string appropriate for HTML text using character references. It returns an instance of HTMLEscaped.

It escapes 3 characters:

  • '&' to '&amp;'

  • '<' to '&lt;'

  • '>' to '&gt;'

Escape.html_text("abc") #=> #<Escape::HTMLEscaped: abc>
Escape.html_text("a & b < c > d") #=> #<Escape::HTMLEscaped: a &amp; b &lt; c &gt; d>

This function is not appropriate for escaping HTML element attribute because quotes are not escaped.

# File escape.rb, line 340
def html_text(str)
  s = str.gsub(/[&<>]/) {|ch| HTML_TEXT_ESCAPE_HASH[ch] }
  HTMLEscaped.new_no_dup(s)
end
http_parameter(attribute, value) click to toggle source

#http_parameter encodes attribute and value as HTTP parameter in RFC 2616. It returns an instance of MIMEParameter.

ArgumentError is raised if attribute is not HTTP token.

ArgumentError is raised if value is not representable in quoted-string.

Escape.http_parameter("n", "v") #=> #<Escape::MIMEParameter: n=v>
Escape.http_parameter("charset", "us-ascii") #=> #<Escape::MIMEParameter: charset=us-ascii>
Escape.http_parameter("q", "0.2") #=> #<Escape::MIMEParameter: q=0.2>
# File escape.rb, line 490
def http_parameter(attribute, value)
  unless http_token?(attribute)
    raise ArgumentError, "not HTTP token: #{attribute.inspect}"
  end
  MIMEParameter.new("#{attribute}=#{http_parameter_value(value)}")
end
http_parameter_value(str) click to toggle source

#http_parameter_value escapes a string as HTTP parameter value in RFC 2616. It returns an instance of MIMEParameter.

HTTP parameter value is token or quoted-string. token is used if possible.

# File escape.rb, line 472
def http_parameter_value(str)
  if http_token?(str)
    MIMEParameter.new(str)
  else
    http_quoted_string(str)
  end
end
http_params_with_pre(pre, *args) click to toggle source

#http_params_with_pre encodes parameters and joins with given prefix.

Escape.http_params_with_pre("; ", "foo", "bar")
#=> #<Escape::MIMEParameter: ; foo=bar>

Escape.http_params_with_pre("; ", "foo", "bar", "hoge", "fuga")
#=> #<Escape::MIMEParameter: ; foo=bar; hoge=fuga>

If args are empty, empty MIMEParameter is returned.

Escape.http_params_with_pre("; ") #=> #<Escape::MIMEParameter: >
# File escape.rb, line 541
def http_params_with_pre(pre, *args)
  pairs = _parse_http_params_args(args)
  s = pairs.map {|attribute, value| pre + http_parameter(attribute, value).to_s }.join('')
  MIMEParameter.new_no_dup(s)
end
http_params_with_sep(sep, *args) click to toggle source

#http_params_with_sep encodes parameters and joins with sep.

Escape.http_params_with_sep("; ", "foo", "bar")
#=> #<Escape::MIMEParameter: foo=bar>

Escape.http_params_with_sep("; ", "foo", "bar", "hoge", "fuga")
#=> #<Escape::MIMEParameter: foo=bar; hoge=fuga>

If args are empty, empty MIMEParameter is returned.

Escape.http_params_with_sep("; ") #=> #<Escape::MIMEParameter: >
# File escape.rb, line 523
def http_params_with_sep(sep, *args)
  pairs = _parse_http_params_args(args)
  s = pairs.map {|attribute, value| http_parameter(attribute, value) }.join(sep)
  MIMEParameter.new_no_dup(s)
end
http_quoted_string(str) click to toggle source

#http_quoted_string escapes a string as quoted-string defined in RFC 2616. It returns an instance of MIMEParameter.

The given string may contain carriage returns (“r”) and line feeds (“n”). However they must be part of folding white space: /rn[ t]/ or /n[ t]/. #http_quoted_string assumes that newlines are represented as “n” or “rn”.

# File escape.rb, line 459
def http_quoted_string(str)
  if /\A(?:[\0-\x09\x0b\x0c\x0e-\xff]|\r?\n[ \t])*\z/n !~ str
    raise ArgumentError, "CR or LF not part of folding white space exists: #{str.inspect}"
  end
  s = '"' + str.gsub(/["\]/, '\\\&') + '"'
  MIMEParameter.new_no_dup(s)
end
http_token?(str) click to toggle source

predicate for MIME token.

token is a sequence of any CHAR except CTLs or separators

# File escape.rb, line 448
def http_token?(str)
  /\A[!\#-'*+\-.0-9A-Z^-z|~]+\z/ =~ str ? true : false
end
ltsv_line(assoc) click to toggle source
# File escape.rb, line 557
def ltsv_line(assoc)
  result = ''
  assoc.each {|k, v|
    result << _ltsv_key(k)
    result << ':'
    result << _ltsv_val(v)
    result << "\t"
  }
  result.sub!(/\t\z/, "\n")
  LTSVEscaped.new_no_dup(result)
end
mime_parameter(attribute, value) click to toggle source

#mime_parameter encodes attribute and value as MIME parameter in RFC 2045. It returns an instance of MIMEParameter.

ArgumentError is raised if attribute is not MIME token.

ArgumentError is raised if value contains CR, LF or NUL.

Escape.mime_parameter("n", "v") #=> #<Escape::MIMEParameter: n=v>
Escape.mime_parameter("charset", "us-ascii") #=> #<Escape::MIMEParameter: charset=us-ascii>
Escape.mime_parameter("boundary", "gc0pJq0M:08jU534c0p") #=> #<Escape::MIMEParameter: boundary="gc0pJq0M:08jU534c0p">
Escape.mime_parameter("boundary", "simple boundary") #=> #<Escape::MIMEParameter: boundary="simple boundary">
# File escape.rb, line 438
def mime_parameter(attribute, value)
  unless mime_token?(attribute)
    raise ArgumentError, "not MIME token: #{attribute.inspect}"
  end
  MIMEParameter.new("#{attribute}=#{mime_parameter_value(value)}")
end
mime_parameter_value(str) click to toggle source

#mime_parameter_value escapes a string as MIME parameter value in RFC 2045. It returns an instance of MIMEParameter.

MIME parameter value is token or quoted-string. token is used if possible.

# File escape.rb, line 419
def mime_parameter_value(str)
  if mime_token?(str)
    MIMEParameter.new(str)
  else
    rfc2822_quoted_string(str)
  end
end
mime_token?(str) click to toggle source

predicate for MIME token.

token is a sequence of any (US-ASCII) CHAR except SPACE, CTLs, or tspecials.

# File escape.rb, line 385
def mime_token?(str)
  /\A[!\#-'*+\-.0-9A-Z^-~]+\z/ =~ str ? true : false
end
percent_encoding(str) click to toggle source

#percent_encoding escapes URI non-unreserved characters using percent-encoding. It returns an instance of PercentEncoded.

The unreserved characters are alphabet, digit, hyphen, dot, underscore and tilde.

RFC 3986

#percent_encoding(“foo”) #=> #<Escape::PercentEncoded: foo>

#percent_encoding(' !“#$%&'()*+,-./:;<=>?@[\]^_`{|}~') #=> #<Escape::PercentEncoded: %20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F%3A%3B%3C%3D%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~>

# File escape.rb, line 176
def percent_encoding(str)
  s = str.gsub(%r{[^A-Za-z0-9\-._~]}n) {
    '%' + $&.unpack("H2")[0].upcase
  }
  PercentEncoded.new_no_dup(s)
end
rfc2822_quoted_string(str) click to toggle source

#rfc2822_quoted_string escapes a string as quoted-string defined in RFC 2822. It returns an instance of MIMEParameter.

The obsolete syntax in quoted-string is not permitted. For example, NUL causes ArgumentError.

The given string may contain carriage returns (“r”) and line feeds (“n”). However they must be part of folding white space: /rn[ t]/ or /n[ t]/. #rfc2822_quoted_string assumes that newlines are represented as “n” or “rn”.

#rfc2822_quoted_string does not permit consecutive sequence of folding white spaces such as “n n ”, according to RFC 2822 syntax.

# File escape.rb, line 406
def rfc2822_quoted_string(str)
  if /\A(?:#{RFC2822_FWS}?[\x01-\x09\x0b\x0c\x0e-\x7f])*#{RFC2822_FWS}?\z/o !~ str
    raise ArgumentError, "not representable in quoted-string of RFC 2822: #{str.inspect}"
  end
  s = '"' + str.gsub(/["\]/, '\\\&') + '"'
  MIMEParameter.new_no_dup(s)
end
shell_command(command) click to toggle source

#shell_command composes a sequence of words to a single shell command line. All shell meta characters are quoted and the words are concatenated with interleaving space. It returns an instance of ShellEscaped.

Escape.shell_command(["ls", "/"]) #=> #<Escape::ShellEscaped: ls />
Escape.shell_command(["echo", "*"]) #=> #<Escape::ShellEscaped: echo '*'>

Note that system(*command) and system(Escape.shell_command(command).to_s) is roughly same. There are two exception as follows.

  • The first is that the later may invokes /bin/sh.

  • The second is an interpretation of an array with only one element: the element is parsed by the shell with the former but it is recognized as single word with the later. For example, system(*[“echo foo”]) invokes echo command with an argument “foo”. But system(Escape.shell_command([“echo foo”]).to_s) invokes “echo foo” command without arguments (and it probably fails).

# File escape.rb, line 94
def shell_command(command)
  s = command.map {|word| shell_single_word(word) }.join(' ')
  ShellEscaped.new_no_dup(s)
end
shell_single_word(str) click to toggle source

#shell_single_word quotes shell meta characters. It returns an instance of ShellEscaped.

The result string is always single shell word, even if the argument is “”. #shell_single_word(“”) returns #<Escape::ShellEscaped: ''>.

Escape.shell_single_word("") #=> #<Escape::ShellEscaped: ''>
Escape.shell_single_word("foo") #=> #<Escape::ShellEscaped: foo>
Escape.shell_single_word("*") #=> #<Escape::ShellEscaped: '*'>
# File escape.rb, line 109
def shell_single_word(str)
  if str.empty?
    ShellEscaped.new_no_dup("''")
  elsif %r{\A[0-9A-Za-z+,./:=@_-]+\z} =~ str
    ShellEscaped.new(str)
  else
    result = ''
    str.scan(/('+)|[^']+/) {
      if $1
        result << %q{\} * $1.length
      else
        result << "'#{$&}'"
      end
    }
    ShellEscaped.new_no_dup(result)
  end
end
uri_path(arg) click to toggle source

#uri_path escapes URI path using percent-encoding.

The given path should be one of follows.

  • a sequence of (non-escaped) segments separated by “/”. (The segments cannot contains “/”.)

  • an array containing (non-escaped) segments. (The segments may contains “/”.)

It returns an instance of PercentEncoded.

Escape.uri_path("a/b/c") #=> #<Escape::PercentEncoded: a/b/c>
Escape.uri_path("a?b/c?d/e?f") #=> #<Escape::PercentEncoded: a%3Fb/c%3Fd/e%3Ff>
Escape.uri_path(%w[/d f]) #=> "%2Fd/f"

The path is the part after authority before query in URI, as follows.

scheme://authority/path#fragment

See RFC 3986 for details of URI.

Note that this function is not appropriate to convert OS path to URI.

# File escape.rb, line 222
def uri_path(arg)
  if arg.respond_to? :to_ary
    s = arg.map {|elt| uri_segment(elt) }.join('/')
  else
    s = arg.gsub(%r{[^/]+}n) { uri_segment($&) }
  end
  PercentEncoded.new_no_dup(s)
end
uri_segment(str) click to toggle source

#uri_segment escapes URI segment using percent-encoding. It returns an instance of PercentEncoded.

Escape.uri_segment("a/b") #=> #<Escape::PercentEncoded: a%2Fb>

The segment is “/”-splitted element after authority before query in URI, as follows.

scheme://authority/segment1/segment2/.../segmentN?query#fragment

See RFC 3986 for details of URI.

# File escape.rb, line 193
def uri_segment(str)
  # pchar - pct-encoded = unreserved / sub-delims / ":" / "@"
  # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
  s = str.gsub(%r{[^A-Za-z0-9\-._~!$&'()*+,;=:@]}n) {
    '%' + $&.unpack("H2")[0].upcase
  }
  PercentEncoded.new_no_dup(s)
end