class Object

Constants

LOCAL_PACKAGE_RESPONSE
NOKOGIRI_HELP_MESSAGE
OTHER_LIBRARY_VERSIONS

Keep track of what versions of what libraries we build against

PACKAGE_ROOT_DIR

helpful constants

REQUIRED_LIBXML_VERSION
REQUIRED_MINI_PORTILE_VERSION
REQUIRED_PKG_CONFIG_VERSION

Public Instance Methods

Parse a document contained in args. Nokogiri will try to guess what type of document you are attempting to parse. For more information, see Nokogiri.parse

To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.

# File lib/nokogiri.rb, line 108
def Nokogiri(*args, &block)
  if block
    Nokogiri::HTML4::Builder.new(&block).doc.root
  else
    Nokogiri.parse(*args)
  end
end
# File ext/nokogiri/extconf.rb, line 319
def abort_could_not_find_library(lib)
  callers = caller(1..2).join("\n")
  abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----")
end
# File ext/nokogiri/extconf.rb, line 208
def aix?
  RbConfig::CONFIG["target_os"].include?("aix")
end
# File ext/nokogiri/extconf.rb, line 324
def chdir_for_build(&block)
  # When using rake-compiler-dock on Windows, the underlying Virtualbox shared
  # folders don't support symlinks, but libiconv expects it for a build on
  # Linux. We work around this limitation by using the temp dir for cooking.
  build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "."
  Dir.chdir(build_dir, &block)
end
# File ext/nokogiri/extconf.rb, line 224
def concat_flags(*args)
  args.compact.join(" ")
end

utility functions

# File ext/nokogiri/extconf.rb, line 169
def config_clean?
  enable_config("clean", true)
end
# File ext/nokogiri/extconf.rb, line 178
def config_cross_build?
  enable_config("cross-build")
end
# File ext/nokogiri/extconf.rb, line 173
def config_static?
  default_static = !truffle?
  enable_config("static", default_static)
end
# File ext/nokogiri/extconf.rb, line 182
def config_system_libraries?
  enable_config("system-libraries", ENV.key?("NOKOGIRI_USE_SYSTEM_LIBRARIES")) do |_, default|
    arg_config("--use-system-libraries", default)
  end
end
# File ext/nokogiri/extconf.rb, line 188
def config_with_xml2_legacy?
  enable_config("xml2-legacy", true)
end
# File ext/nokogiri/extconf.rb, line 557
def copy_packaged_libraries_headers(to_path:, from_recipes:)
  FileUtils.rm_rf(to_path, secure: true)
  FileUtils.mkdir(to_path)
  from_recipes.each do |recipe|
    FileUtils.cp_r(Dir[File.join(recipe.path, "include/*")], to_path)
  end
end
# File ext/nokogiri/extconf.rb, line 200
def darwin?
  RbConfig::CONFIG["target_os"].include?("darwin")
end
# File ext/nokogiri/extconf.rb, line 570
def do_clean
  root = Pathname(PACKAGE_ROOT_DIR)
  pwd  = Pathname(Dir.pwd)

  # Skip if this is a development work tree
  unless (root + ".git").exist?
    message("Cleaning files only used during build.\n")

    # (root + 'tmp') cannot be removed at this stage because
    # nokogiri.so is yet to be copied to lib.

    # clean the ports build directory
    Pathname.glob(pwd.join("tmp", "*", "ports")) do |dir|
      FileUtils.rm_rf(dir, verbose: true)
    end

    if config_static?
      # ports installation can be safely removed if statically linked.
      FileUtils.rm_rf(root + "ports", verbose: true)
    else
      FileUtils.rm_rf(root + "ports" + "archives", verbose: true)
    end
  end

  exit!(0)
end
# File ext/nokogiri/extconf.rb, line 565
def do_help
  print(NOKOGIRI_HELP_MESSAGE)
  exit!(0)
end
# File ext/nokogiri/extconf.rb, line 308
def ensure_func(func, headers = nil)
  have_func(func, headers) || abort_could_not_find_library(func)
end
# File ext/nokogiri/extconf.rb, line 303
def ensure_package_configuration(opt: nil, pc: nil, lib:, func:, headers:)
  have_package_configuration(opt: opt, pc: pc, lib: lib, func: func, headers: headers) ||
    abort_could_not_find_library(lib)
end
# File ext/nokogiri/extconf.rb, line 242
def gnome_source
  "https://download.gnome.org"
end
# File ext/nokogiri/extconf.rb, line 361
def have_libxml_headers?(version = nil)
  source = if version.nil?
    <<~SRC
      #include <libxml/xmlversion.h>
    SRC
  else
    version_int = format("%d%2.2d%2.2d", *version.split("."))
    <<~SRC
      #include <libxml/xmlversion.h>
      #if LIBXML_VERSION < #{version_int}
      #  error libxml2 is older than #{version}
      #endif
    SRC
  end

  try_cpp(source)
end

set up mkmf to link against the library if we can find it

# File ext/nokogiri/extconf.rb, line 286
def have_package_configuration(opt: nil, pc: nil, lib:, func:, headers:)
  if opt
    dir_config(opt)
    dir_config("opt")
  end

  # see if we have enough path info to do this without trying any harder
  unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG")
    return true if local_have_library(lib, func, headers)
  end

  try_package_configuration(pc) if pc

  # verify that we can compile and link against the library
  local_have_library(lib, func, headers)
end
# File ext/nokogiri/extconf.rb, line 400
def iconv_configure_flags
  # give --with-iconv-dir and --with-opt-dir first priority
  ["iconv", "opt"].each do |target|
    config = preserving_globals { dir_config(target) }
    next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) }

    idirs, ldirs = config.map do |dirs|
      Array(dirs).flat_map do |dir|
        dir.split(File::PATH_SEPARATOR)
      end if dirs
    end

    return [
      "--with-iconv=yes",
      *("CPPFLAGS=#{idirs.map { |dir| "-I" + dir }.join(" ")}" if idirs),
      *("LDFLAGS=#{ldirs.map { |dir| "-L" + dir }.join(" ")}" if ldirs),
    ]
  end

  if try_link_iconv
    return ["--with-iconv=yes"]
  end

  config = preserving_globals { pkg_config("libiconv") }
  if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
    cflags, ldflags, libs = config

    return [
      "--with-iconv=yes",
      "CPPFLAGS=#{cflags}",
      "LDFLAGS=#{ldflags}",
      "LIBS=#{libs}",
    ]
  end

  abort_could_not_find_library("libiconv")
end
# File ext/nokogiri/extconf.rb, line 354
def libflag_to_filename(ldflag)
  case ldflag
  when /\A-l(.+)/
    "lib#{Regexp.last_match(1)}.#{$LIBEXT}"
  end
end
# File ext/nokogiri/extconf.rb, line 228
def local_have_library(lib, func = nil, headers = nil)
  have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
end

In ruby 3.2, symbol resolution changed on Darwin, to introduce the ‘-bundle_loader` flag to resolve symbols against the ruby binary.

This makes it challenging to build a single extension that works with both a ruby with ‘–enable-shared` and one with `–disable-shared. To work around that, we choose to add `-flat_namespace` to the link line (later in this file).

The ‘-flat_namespace` line introduces its own behavior change, which is that (similar to on Linux), any symbols in the extension that are exported may now be resolved by shared libraries loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2, and that messes with our assumptions about whether we’re running with a patched libxml2 or a vanilla libxml2.

We choose to use ‘-load_hidden` in this case to prevent exporting those symbols from libxml2 and libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other words, when we use `load_hidden`, what happens in the extension stays in the extension.

See github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.

Anyway, this method is the logical bit to tell us when to turn on these workarounds.

# File ext/nokogiri/extconf.rb, line 619
def needs_darwin_linker_hack
  config_cross_build? &&
    darwin? &&
    Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
end
# File ext/nokogiri/extconf.rb, line 216
def nix?
  ENV.key?("NIX_CC")
end
# File ext/nokogiri/extconf.rb, line 204
def openbsd?
  RbConfig::CONFIG["target_os"].include?("openbsd")
end
# File ext/nokogiri/extconf.rb, line 312
def preserving_globals
  values = [$arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs].map(&:dup)
  yield
ensure
  $arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs = values
end
# File ext/nokogiri/extconf.rb, line 438
def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
  require "rubygems"
  gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time
  require "mini_portile2"
  message("Using mini_portile version #{MiniPortile::VERSION}\n")

  unless ["libxml2", "libxslt"].include?(name)
    OTHER_LIBRARY_VERSIONS[name] = version
  end

  MiniPortile.new(name, version).tap do |recipe|
    def recipe.port_path
      "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
    end

    # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
    # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
    recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
    recipe.host = recipe.host.gsub("i386", "i686")

    recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
    recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"

    yield recipe

    env = Hash.new do |hash, key|
      hash[key] = (ENV[key]).to_s
    end

    recipe.configure_options.flatten!

    recipe.configure_options.delete_if do |option|
      case option
      when /\A(\w+)=(.*)\z/
        env[Regexp.last_match(1)] = if env.key?(Regexp.last_match(1))
          concat_flags(env[Regexp.last_match(1)], Regexp.last_match(2))
        else
          Regexp.last_match(2)
        end
        true
      else
        false
      end
    end

    if static_p
      recipe.configure_options += [
        "--disable-shared",
        "--enable-static",
      ]
      env["CFLAGS"] = concat_flags(env["CFLAGS"], "-fPIC")
    else
      recipe.configure_options += [
        "--enable-shared",
        "--disable-static",
      ]
    end

    if cross_p
      recipe.configure_options += [
        "--target=#{recipe.host}",
        "--host=#{recipe.host}",
      ]
    end

    if RbConfig::CONFIG["target_cpu"] == "universal"
      ["CFLAGS", "LDFLAGS"].each do |key|
        unless env[key].include?("-arch")
          env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"])
        end
      end
    end

    recipe.configure_options += env.map do |key, value|
      "#{key}=#{value.strip}"
    end

    checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed"
    if File.exist?(checkpoint) && !recipe.source_directory
      message("Building Nokogiri with a packaged version of #{name}-#{version}.\n")
    else
      message(<<~EOM)
        ---------- IMPORTANT NOTICE ----------
        Building Nokogiri with a packaged version of #{name}-#{version}.
        Configuration options: #{recipe.configure_options.shelljoin}
      EOM

      unless recipe.patch_files.empty?
        message("The following patches are being applied:\n")

        recipe.patch_files.each do |patch|
          message(format("  - %s\n", File.basename(patch)))
        end
      end

      message(<<~EOM) if name != "libgumbo"

        The Nokogiri maintainers intend to provide timely security updates, but if
        this is a concern for you and want to use your OS/distro system library
        instead, then abort this installation process and install nokogiri as
        instructed at:

          https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries

      EOM

      message(<<~EOM) if name == "libxml2"
        Note, however, that nokogiri cannot guarantee compatibility with every
        version of libxml2 that may be provided by OS/package vendors.

      EOM

      chdir_for_build { recipe.cook }
      FileUtils.touch(checkpoint)
    end
    recipe.activate
  end
end
# File bin/nokogiri, line 43
def safe_read(uri_or_path)
  uri = URI.parse(uri_or_path)
  case uri
  when URI::HTTP
    uri.read
  when URI::File
    File.read(uri.path)
  else
    File.read(uri_or_path)
  end
end
# File ext/nokogiri/extconf.rb, line 332
def sh_export_path(path)
  # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":"
  # as a $PATH separator, we need to convert windows paths from
  #
  #   C:/path/to/foo
  #
  # to
  #
  #   /C/path/to/foo
  #
  # which is sh-compatible, in order to find things properly during
  # configuration
  return path unless windows?

  match = Regexp.new("^([A-Z]):(/.*)").match(path)
  if match && match.length == 3
    return File.join("/", match[1], match[2])
  end

  path
end
# File ext/nokogiri/extconf.rb, line 196
def solaris?
  RbConfig::CONFIG["target_os"].include?("solaris")
end
# File ext/nokogiri/extconf.rb, line 220
def truffle?
  RUBY_ENGINE == "truffleruby"
end

wrapper around MakeMakefil#pkg_config and the PKGConfig gem

# File ext/nokogiri/extconf.rb, line 252
def try_package_configuration(pc)
  unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG_GEM")
    # try MakeMakefile#pkg_config, which uses the system utility `pkg-config`.
    return if checking_for("#{pc} using `pkg_config`", LOCAL_PACKAGE_RESPONSE) do
      pkg_config(pc)
    end
  end

  # `pkg-config` probably isn't installed, which appears to be the case for lots of freebsd systems.
  # let's fall back to the pkg-config gem, which knows how to parse .pc files, and wrap it with the
  # same logic as MakeMakefile#pkg_config
  begin
    require "rubygems"
    gem("pkg-config", REQUIRED_PKG_CONFIG_VERSION)
    require "pkg-config"

    checking_for("#{pc} using pkg-config gem version #{PKGConfig::VERSION}", LOCAL_PACKAGE_RESPONSE) do
      if PKGConfig.have_package(pc)
        cflags  = PKGConfig.cflags(pc)
        ldflags = PKGConfig.libs_only_L(pc)
        libs    = PKGConfig.libs_only_l(pc)

        Logging.message("pkg-config gem found package configuration for %s\n", pc)
        Logging.message("cflags: %s\nldflags: %s\nlibs: %s\n\n", cflags, ldflags, libs)

        [cflags, ldflags, libs]
      end
    end
  rescue LoadError
    message("Please install either the `pkg-config` utility or the `pkg-config` rubygem.\n")
  end
end
# File ext/nokogiri/extconf.rb, line 212
def unix?
  !(windows? || solaris? || darwin?)
end
# File ext/nokogiri/extconf.rb, line 192
def windows?
  RbConfig::CONFIG["target_os"].match?(/mingw|mswin/)
end
# File ext/nokogiri/extconf.rb, line 232
def zlib_source(version_string)
  # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
  # change the default to github.
  if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
    "https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
  else
    "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
  end
end