class RSpec::Support::EncodedString

@private

Constants

REPLACE

Ruby’s default replacement string is:

U+FFFD ("\xEF\xBF\xBD"), for Unicode encoding forms, else
?      ("\x3F")

US_ASCII

UTF_8

Reduce allocations by storing constants.

Attributes

source_encoding [R]

Public Class Methods

new (string, encoding=nil)

# File rspec-support/lib/rspec/support/encoded_string.rb, line 16
def initialize(string, encoding=nil)
  @encoding = encoding
  @source_encoding = detect_source_encoding(string)
  @string = matching_encoding(string)
end

pick_encoding (source_a, source_b)

# File rspec-support/lib/rspec/support/encoded_string.rb, line 143
def self.pick_encoding(source_a, source_b)
  Encoding.compatible?(source_a, source_b) || Encoding.default_external
end

Public Instance Methods

<< (string)

# File rspec-support/lib/rspec/support/encoded_string.rb, line 28
def <<(string)
  @string << matching_encoding(string)
end

detect_source_encoding (string)

# File rspec-support/lib/rspec/support/encoded_string.rb, line 139
def detect_source_encoding(string)
  string.encoding
end

matching_encoding (string)

Encoding Exceptions:

Raised by Encoding and String methods:

Encoding::UndefinedConversionError:
  when a transcoding operation fails
  if the String contains characters invalid for the target encoding
  e.g. "\x80".encode('UTF-8','ASCII-8BIT')
  vs "\x80".encode('UTF-8','ASCII-8BIT', undef: :replace, replace: '<undef>')
  # => '<undef>'
Encoding::CompatibilityError
  when Encoding.compatible?(str1, str2) is nil
  e.g. utf_16le_emoji_string.split("\n")
  e.g. valid_unicode_string.encode(utf8_encoding) << ascii_string
Encoding::InvalidByteSequenceError:
  when the string being transcoded contains a byte invalid for
  either the source or target encoding
  e.g. "\x80".encode('UTF-8','US-ASCII')
  vs "\x80".encode('UTF-8','US-ASCII', invalid: :replace, replace: '<byte>')
  # => '<byte>'
ArgumentError
  when operating on a string with invalid bytes
  e.g."\x80".split("\n")
TypeError
  when a symbol is passed as an encoding
  Encoding.find(:"UTF-8")
  when calling force_encoding on an object
  that doesn't respond to #to_str

Raised by transcoding methods:

Encoding::ConverterNotFoundError:
  when a named encoding does not correspond with a known converter
  e.g. 'abc'.force_encoding('UTF-8').encode('foo')
  or a converter path cannot be found
  e.g. "\x80".force_encoding('ASCII-8BIT').encode('Emacs-Mule')

Raised by byte <-> char conversions

RangeError: out of char range
  e.g. the UTF-16LE emoji: 128169.chr

# File rspec-support/lib/rspec/support/encoded_string.rb, line 93
def matching_encoding(string)
  string = remove_invalid_bytes(string)
  string.encode(@encoding)
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
  # Originally defined as a constant to avoid unneeded allocations, this hash must
  # be defined inline (without {}) to avoid warnings on Ruby 2.7
  #
  # In MRI 2.1 'invalid: :replace' changed to also replace an invalid byte sequence
  # see https://github.com/ruby/ruby/blob/v2_1_0/NEWS#L176
  # https://www.ruby-forum.com/topic/6861247
  # https://twitter.com/nalsh/status/553413844685438976
  #
  # For example, given:
  #   "\x80".force_encoding("Emacs-Mule").encode(:invalid => :replace).bytes.to_a
  #
  # On MRI 2.1 or above: 63  # '?'
  # else               : 128 # "\x80"
  #
  string.encode(@encoding, :invalid => :replace, :undef => :replace, :replace => REPLACE)
rescue Encoding::ConverterNotFoundError
  # Originally defined as a constant to avoid unneeded allocations, this hash must
  # be defined inline (without {}) to avoid warnings on Ruby 2.7
  string.dup.force_encoding(@encoding).encode(:invalid => :replace, :replace => REPLACE)
end

remove_invalid_bytes (string)

github.com/ruby/ruby/blob/eeb05e8c11/doc/NEWS-2.1.0#L120-L123 github.com/ruby/ruby/blob/v2_1_0/string.c#L8242 github.com/hsbt/string-scrub github.com/rubinius/rubinius/blob/v2.5.2/kernel/common/string.rb#L1913-L1972

# File rspec-support/lib/rspec/support/encoded_string.rb, line 124
def remove_invalid_bytes(string)
  string.scrub(REPLACE)
end

split (regex_or_string)

# File rspec-support/lib/rspec/support/encoded_string.rb, line 33
def split(regex_or_string)
  @string.split(matching_encoding(regex_or_string))
rescue ArgumentError
  # JRuby raises an ArgumentError when splitting a source string that
  # contains invalid bytes.
  remove_invalid_bytes(@string).split regex_or_string
end

to_s ()

# File rspec-support/lib/rspec/support/encoded_string.rb, line 46
def to_s
  @string
end

Also aliased as: to_str

to_str ()

Alias for: to_s