class Object
Constants
- LOCAL_PACKAGE_RESPONSE
- NOKOGIRI_HELP_MESSAGE
- OTHER_LIBRARY_VERSIONS
-
Keep track of what versions of what libraries we build against
- PACKAGE_ROOT_DIR
-
helpful constants
- RECOMMENDED_LIBXML_VERSION
- REQUIRED_LIBXML_VERSION
- REQUIRED_MINI_PORTILE_VERSION
- REQUIRED_PKG_CONFIG_VERSION
Public Instance Methods
Parse a document contained in args
. Nokogiri
will try to guess what type of document you are attempting to parse. For more information, see Nokogiri.parse
To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
# File lib/nokogiri.rb, line 108 def Nokogiri(*args, &block) if block Nokogiri::HTML4::Builder.new(&block).doc.root else Nokogiri.parse(*args) end end
# File ext/nokogiri/extconf.rb, line 319 def abort_could_not_find_library(lib) callers = caller(1..2).join("\n") abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----") end
# File ext/nokogiri/extconf.rb, line 208 def aix? RbConfig::CONFIG["target_os"].include?("aix") end
# File ext/nokogiri/extconf.rb, line 324 def chdir_for_build(&block) # When using rake-compiler-dock on Windows, the underlying Virtualbox shared # folders don't support symlinks, but libiconv expects it for a build on # Linux. We work around this limitation by using the temp dir for cooking. build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "." Dir.chdir(build_dir, &block) end
# File ext/nokogiri/extconf.rb, line 224 def concat_flags(*args) args.compact.join(" ") end
utility functions
# File ext/nokogiri/extconf.rb, line 169 def config_clean? enable_config("clean", true) end
# File ext/nokogiri/extconf.rb, line 178 def config_cross_build? enable_config("cross-build") end
# File ext/nokogiri/extconf.rb, line 173 def config_static? default_static = !truffle? enable_config("static", default_static) end
# File ext/nokogiri/extconf.rb, line 182 def config_system_libraries? enable_config("system-libraries", ENV.key?("NOKOGIRI_USE_SYSTEM_LIBRARIES")) do |_, default| arg_config("--use-system-libraries", default) end end
# File ext/nokogiri/extconf.rb, line 188 def config_with_xml2_legacy? enable_config("xml2-legacy", true) end
# File ext/nokogiri/extconf.rb, line 557 def copy_packaged_libraries_headers(to_path:, from_recipes:) FileUtils.rm_rf(to_path, secure: true) FileUtils.mkdir(to_path) from_recipes.each do |recipe| FileUtils.cp_r(Dir[File.join(recipe.path, "include/*")], to_path) end end
# File ext/nokogiri/extconf.rb, line 200 def darwin? RbConfig::CONFIG["target_os"].include?("darwin") end
# File ext/nokogiri/extconf.rb, line 570 def do_clean root = Pathname(PACKAGE_ROOT_DIR) pwd = Pathname(Dir.pwd) # Skip if this is a development work tree unless (root + ".git").exist? message("Cleaning files only used during build.\n") # (root + 'tmp') cannot be removed at this stage because # nokogiri.so is yet to be copied to lib. # clean the ports build directory Pathname.glob(pwd.join("tmp", "*", "ports")) do |dir| FileUtils.rm_rf(dir, verbose: true) end if config_static? # ports installation can be safely removed if statically linked. FileUtils.rm_rf(root + "ports", verbose: true) else FileUtils.rm_rf(root + "ports" + "archives", verbose: true) end end exit!(0) end
# File ext/nokogiri/extconf.rb, line 565 def do_help print(NOKOGIRI_HELP_MESSAGE) exit!(0) end
# File ext/nokogiri/extconf.rb, line 308 def ensure_func(func, headers = nil) have_func(func, headers) || abort_could_not_find_library(func) end
# File ext/nokogiri/extconf.rb, line 303 def ensure_package_configuration(opt: nil, pc: nil, lib:, func:, headers:) have_package_configuration(opt: opt, pc: pc, lib: lib, func: func, headers: headers) || abort_could_not_find_library(lib) end
# File ext/nokogiri/extconf.rb, line 242 def gnome_source "https://download.gnome.org" end
# File ext/nokogiri/extconf.rb, line 361 def have_libxml_headers?(version = nil) source = if version.nil? <<~SRC #include <libxml/xmlversion.h> SRC else version_int = format("%d%2.2d%2.2d", *version.split(".")) <<~SRC #include <libxml/xmlversion.h> #if LIBXML_VERSION < #{version_int} # error libxml2 is older than #{version} #endif SRC end try_cpp(source) end
set up mkmf to link against the library if we can find it
# File ext/nokogiri/extconf.rb, line 286 def have_package_configuration(opt: nil, pc: nil, lib:, func:, headers:) if opt dir_config(opt) dir_config("opt") end # see if we have enough path info to do this without trying any harder unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG") return true if local_have_library(lib, func, headers) end try_package_configuration(pc) if pc # verify that we can compile and link against the library local_have_library(lib, func, headers) end
# File ext/nokogiri/extconf.rb, line 400 def iconv_configure_flags # give --with-iconv-dir and --with-opt-dir first priority ["iconv", "opt"].each do |target| config = preserving_globals { dir_config(target) } next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) } idirs, ldirs = config.map do |dirs| Array(dirs).flat_map do |dir| dir.split(File::PATH_SEPARATOR) end if dirs end return [ "--with-iconv=yes", *("CPPFLAGS=#{idirs.map { |dir| "-I" + dir }.join(" ")}" if idirs), *("LDFLAGS=#{ldirs.map { |dir| "-L" + dir }.join(" ")}" if ldirs), ] end if try_link_iconv return ["--with-iconv=yes"] end config = preserving_globals { pkg_config("libiconv") } if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") } cflags, ldflags, libs = config return [ "--with-iconv=yes", "CPPFLAGS=#{cflags}", "LDFLAGS=#{ldflags}", "LIBS=#{libs}", ] end abort_could_not_find_library("libiconv") end
# File ext/nokogiri/extconf.rb, line 354 def libflag_to_filename(ldflag) case ldflag when /\A-l(.+)/ "lib#{Regexp.last_match(1)}.#{$LIBEXT}" end end
# File ext/nokogiri/extconf.rb, line 228 def local_have_library(lib, func = nil, headers = nil) have_library(lib, func, headers) || have_library("lib#{lib}", func, headers) end
In ruby 3.2, symbol resolution changed on Darwin, to introduce the ‘-bundle_loader` flag to resolve symbols against the ruby binary.
This makes it challenging to build a single extension that works with both a ruby with ‘–enable-shared` and one with `–disable-shared. To work around that, we choose to add `-flat_namespace` to the link line (later in this file).
The ‘-flat_namespace` line introduces its own behavior change, which is that (similar to on Linux), any symbols in the extension that are exported may now be resolved by shared libraries loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2, and that messes with our assumptions about whether we’re running with a patched libxml2 or a vanilla libxml2.
We choose to use ‘-load_hidden` in this case to prevent exporting those symbols from libxml2 and libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other words, when we use `load_hidden`, what happens in the extension stays in the extension.
See github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.
Anyway, this method is the logical bit to tell us when to turn on these workarounds.
# File ext/nokogiri/extconf.rb, line 619 def needs_darwin_linker_hack config_cross_build? && darwin? && Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first)) end
# File ext/nokogiri/extconf.rb, line 216 def nix? ENV.key?("NIX_CC") end
# File ext/nokogiri/extconf.rb, line 204 def openbsd? RbConfig::CONFIG["target_os"].include?("openbsd") end
# File ext/nokogiri/extconf.rb, line 312 def preserving_globals values = [$arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs].map(&:dup) yield ensure $arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs = values end
# File ext/nokogiri/extconf.rb, line 438 def process_recipe(name, version, static_p, cross_p, cacheable_p = true) require "rubygems" gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time require "mini_portile2" message("Using mini_portile version #{MiniPortile::VERSION}\n") unless ["libxml2", "libxslt"].include?(name) OTHER_LIBRARY_VERSIONS[name] = version end MiniPortile.new(name, version).tap do |recipe| def recipe.port_path "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}" end # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits). recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"] recipe.host = recipe.host.gsub("i386", "i686") recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}" yield recipe env = Hash.new do |hash, key| hash[key] = (ENV[key]).to_s end recipe.configure_options.flatten! recipe.configure_options.delete_if do |option| case option when /\A(\w+)=(.*)\z/ env[Regexp.last_match(1)] = if env.key?(Regexp.last_match(1)) concat_flags(env[Regexp.last_match(1)], Regexp.last_match(2)) else Regexp.last_match(2) end true else false end end if static_p recipe.configure_options += [ "--disable-shared", "--enable-static", ] env["CFLAGS"] = concat_flags(env["CFLAGS"], "-fPIC") else recipe.configure_options += [ "--enable-shared", "--disable-static", ] end if cross_p recipe.configure_options += [ "--target=#{recipe.host}", "--host=#{recipe.host}", ] end if RbConfig::CONFIG["target_cpu"] == "universal" ["CFLAGS", "LDFLAGS"].each do |key| unless env[key].include?("-arch") env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"]) end end end recipe.configure_options += env.map do |key, value| "#{key}=#{value.strip}" end checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed" if File.exist?(checkpoint) && !recipe.source_directory message("Building Nokogiri with a packaged version of #{name}-#{version}.\n") else message(<<~EOM) ---------- IMPORTANT NOTICE ---------- Building Nokogiri with a packaged version of #{name}-#{version}. Configuration options: #{recipe.configure_options.shelljoin} EOM unless recipe.patch_files.empty? message("The following patches are being applied:\n") recipe.patch_files.each do |patch| message(format(" - %s\n", File.basename(patch))) end end message(<<~EOM) if name != "libgumbo" The Nokogiri maintainers intend to provide timely security updates, but if this is a concern for you and want to use your OS/distro system library instead, then abort this installation process and install nokogiri as instructed at: https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries EOM message(<<~EOM) if name == "libxml2" Note, however, that nokogiri cannot guarantee compatibility with every version of libxml2 that may be provided by OS/package vendors. EOM chdir_for_build { recipe.cook } FileUtils.touch(checkpoint) end recipe.activate end end
# File bin/nokogiri, line 43 def safe_read(uri_or_path) uri = URI.parse(uri_or_path) case uri when URI::HTTP uri.read when URI::File File.read(uri.path) else File.read(uri_or_path) end end
# File ext/nokogiri/extconf.rb, line 332 def sh_export_path(path) # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":" # as a $PATH separator, we need to convert windows paths from # # C:/path/to/foo # # to # # /C/path/to/foo # # which is sh-compatible, in order to find things properly during # configuration return path unless windows? match = Regexp.new("^([A-Z]):(/.*)").match(path) if match && match.length == 3 return File.join("/", match[1], match[2]) end path end
# File ext/nokogiri/extconf.rb, line 196 def solaris? RbConfig::CONFIG["target_os"].include?("solaris") end
# File ext/nokogiri/extconf.rb, line 220 def truffle? RUBY_ENGINE == "truffleruby" end
# File ext/nokogiri/extconf.rb, line 379 def try_link_iconv(using = nil) checking_for(using ? "iconv using #{using}" : "iconv") do ["", "-liconv"].any? do |opt| preserving_globals do yield if block_given? try_link(<<~SRC, opt) #include <stdlib.h> #include <iconv.h> int main(void) { iconv_t cd = iconv_open("", ""); iconv(cd, NULL, NULL, NULL, NULL); return EXIT_SUCCESS; } SRC end end end end
wrapper around MakeMakefil#pkg_config and the PKGConfig gem
# File ext/nokogiri/extconf.rb, line 252 def try_package_configuration(pc) unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG_GEM") # try MakeMakefile#pkg_config, which uses the system utility `pkg-config`. return if checking_for("#{pc} using `pkg_config`", LOCAL_PACKAGE_RESPONSE) do pkg_config(pc) end end # `pkg-config` probably isn't installed, which appears to be the case for lots of freebsd systems. # let's fall back to the pkg-config gem, which knows how to parse .pc files, and wrap it with the # same logic as MakeMakefile#pkg_config begin require "rubygems" gem("pkg-config", REQUIRED_PKG_CONFIG_VERSION) require "pkg-config" checking_for("#{pc} using pkg-config gem version #{PKGConfig::VERSION}", LOCAL_PACKAGE_RESPONSE) do if PKGConfig.have_package(pc) cflags = PKGConfig.cflags(pc) ldflags = PKGConfig.libs_only_L(pc) libs = PKGConfig.libs_only_l(pc) Logging.message("pkg-config gem found package configuration for %s\n", pc) Logging.message("cflags: %s\nldflags: %s\nlibs: %s\n\n", cflags, ldflags, libs) [cflags, ldflags, libs] end end rescue LoadError message("Please install either the `pkg-config` utility or the `pkg-config` rubygem.\n") end end
# File ext/nokogiri/extconf.rb, line 212 def unix? !(windows? || solaris? || darwin?) end
# File ext/nokogiri/extconf.rb, line 192 def windows? RbConfig::CONFIG["target_os"].match?(/mingw|mswin/) end
# File ext/nokogiri/extconf.rb, line 232 def zlib_source(version_string) # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to # change the default to github. if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"] "https://zlib.net/fossils/zlib-#{version_string}.tar.gz" else "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz" end end