diff --git a/Gemfile b/Gemfile index 1ea8f2da..6bb9fcb8 100644 --- a/Gemfile +++ b/Gemfile @@ -4,9 +4,7 @@ gemspec gem "redcarpet", :platforms => :ruby gem "kramdown", :platforms => :jruby gem "RedCloth" -# using a tag version here because 0.18.3 was not published by the author to encourage users to upgrade. -# however we want to bump up to this version since this has a security patch -gem "commonmarker", git: "https://github.com/gjtorikian/commonmarker.git", tag: "v0.18.3" +gem "commonmarker", "~> 2.8.2" gem "rdoc", "~> 7.2.0" gem "org-ruby", "0.9.12" gem "creole", "~>0.5.0" diff --git a/Gemfile.lock b/Gemfile.lock index 8d0f756d..cd835695 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,11 +1,3 @@ -GIT - remote: https://github.com/gjtorikian/commonmarker.git - revision: 2838ebaa83ee0081d481c21f3bc0e4cb3e8de9da - tag: v0.18.3 - specs: - commonmarker (0.18.3) - ruby-enum (~> 0.5) - PATH remote: . specs: @@ -34,6 +26,13 @@ GEM builder (3.3.0) cgi (0.5.1) charlock_holmes (0.7.9) + commonmarker (2.8.2) + rb_sys (~> 0.9) + commonmarker (2.8.2-aarch64-linux) + commonmarker (2.8.2-arm-linux) + commonmarker (2.8.2-arm64-darwin) + commonmarker (2.8.2-x86_64-darwin) + commonmarker (2.8.2-x86_64-linux) concurrent-ruby (1.3.6) connection_pool (3.0.2) crass (1.0.6) @@ -85,14 +84,15 @@ GEM stringio racc (1.8.1) rake (13.4.2) + rake-compiler-dock (1.12.0) + rb_sys (0.9.128) + rake-compiler-dock (= 1.12.0) rdoc (7.2.0) erb psych (>= 4.0.0) tsort redcarpet (3.6.1) rexml (3.4.4) - ruby-enum (0.9.0) - i18n rubypants (0.7.1) rugged (1.9.0) sanitize (6.1.3) @@ -135,7 +135,7 @@ DEPENDENCIES RedCloth activesupport (~> 8.1.3) asciidoctor (~> 2.0.26) - commonmarker! + commonmarker (~> 2.8.2) creole (~> 0.5.0) github-linguist (>= 7.1.3) github-markup! diff --git a/lib/github/markup/markdown.rb b/lib/github/markup/markdown.rb index dcf93229..5b17afb7 100644 --- a/lib/github/markup/markdown.rb +++ b/lib/github/markup/markdown.rb @@ -5,9 +5,90 @@ module Markup class Markdown < Implementation MARKDOWN_GEMS = { "commonmarker" => proc { |content, options: {}| - commonmarker_opts = [:GITHUB_PRE_LANG].concat(options.fetch(:commonmarker_opts, [])) - commonmarker_exts = options.fetch(:commonmarker_exts, [:tagfilter, :autolink, :table, :strikethrough]) - CommonMarker.render_html(content, commonmarker_opts, commonmarker_exts) + legacy_opts = options.fetch(:commonmarker_opts, []) + legacy_exts = options.fetch( + :commonmarker_exts, + [:tagfilter, :autolink, :table, :strikethrough], + ) + + parse_options = {} + # commonmarker 2.x changes several render defaults that diverge from cmark-gfm 0.x: + # - hardbreaks defaults to true in 2.x but was false in 0.x. + # - escaped_char_spans defaults to true in 2.x and wraps backslash-escaped chars in + # ; 0.x emitted bare characters. + # - gfm_quirks defaults to false in 2.x; 0.x (cmark-gfm) always had the quirk on, + # which collapses ****foo**** to foo instead of nesting. + # - github_pre_lang defaults to true in 2.x; set explicitly to match the legacy contract. + render_options = { + github_pre_lang: true, + hardbreaks: false, + escaped_char_spans: false, + gfm_quirks: true, + } + extension_options = {} + + legacy_opts.each do |opt| + case opt + when :DEFAULT then nil + when :SOURCEPOS then render_options[:sourcepos] = true + when :HARDBREAKS then render_options[:hardbreaks] = true + when :NOBREAKS then render_options[:hardbreaks] = false + when :SMART then parse_options[:smart] = true + when :GITHUB_PRE_LANG then render_options[:github_pre_lang] = true + when :UNSAFE then render_options[:unsafe] = true + when :FOOTNOTES then extension_options[:footnotes] = true + when :FULL_INFO_STRING then render_options[:full_info_string] = true + # The legacy options below existed in cmark-gfm 0.x but have no direct commonmarker + # 2.x equivalent. Accept them so existing callers don't break, but they have no effect: + # :VALIDATE_UTF8 / :LIBERAL_HTML_TAG - enforced at the Rust type layer in 2.x. + # :TABLE_PREFER_STYLE_ATTRIBUTES - no 2.x render knob for inline table styles. + # :STRIKETHROUGH_DOUBLE_TILDE - 2.x always accepts both single and double tilde. + when :VALIDATE_UTF8, :LIBERAL_HTML_TAG, + :TABLE_PREFER_STYLE_ATTRIBUTES, :STRIKETHROUGH_DOUBLE_TILDE + nil + else + raise ArgumentError, "unknown commonmarker option: #{opt.inspect}" + end + end + + legacy_exts.each do |ext| + case ext + when :strikethrough, :tagfilter, :autolink, :table, :tasklist, + :shortcodes, :footnotes, :multiline_block_quotes, + :math_dollars, :math_code, :wikilinks_title_after_pipe, + :wikilinks_title_before_pipe, :underline, :subscript, :spoiler, + :greentext, :alerts, :description_lists + extension_options[ext] = true + when :header_ids + # header_ids takes a string prefix in 2.x rather than a boolean. The legacy contract + # only passed it as a symbol, so use an empty prefix to enable anchor generation. + extension_options[:header_ids] = "" + else + raise ArgumentError, "unknown commonmarker extension: #{ext.inspect}" + end + end + + # Several extensions (tagfilter, autolink, table, strikethrough, tasklist, shortcodes) + # are enabled by default in commonmarker 2.x but were strictly opt-in in 0.x. Explicitly + # disable any extension the caller did not request so behavior matches the legacy contract. + [:strikethrough, :tagfilter, :autolink, :table, :tasklist, :shortcodes].each do |ext| + extension_options[ext] = false unless extension_options[ext] + end + + # header_ids is enabled by default in commonmarker 2.x (it injects anchor tags inside + # every heading). The legacy 0.x wrapper never enabled it implicitly, so disable it + # unless the caller explicitly requested it. + extension_options[:header_ids] = nil unless extension_options.key?(:header_ids) + + Commonmarker.to_html( + content, + options: { + parse: parse_options, + render: render_options, + extension: extension_options, + }, + plugins: {syntax_highlighter: nil}, + ) }, "github/markdown" => proc { |content, options: {}| GitHub::Markdown.render(content)