Skip to content

Commit

Permalink
Merge pull request #1114 from PRX/fix/summary_format
Browse files Browse the repository at this point in the history
Deprecate summary and content attributes, and fix description, summary, content text formats in the RSS feeds
  • Loading branch information
kookster authored Oct 7, 2024
2 parents c77a815 + 29faba3 commit ae8f93e
Show file tree
Hide file tree
Showing 24 changed files with 61 additions and 66 deletions.
2 changes: 0 additions & 2 deletions app/controllers/episodes_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ def episode_params
:title,
:clean_title,
:subtitle,
:summary,
:content,
:description,
:production_notes,
:explicit_option,
Expand Down
1 change: 0 additions & 1 deletion app/controllers/feeds_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def nilified_feed_params
:file_name,
:title,
:subtitle,
:summary,
:description,
:include_donation_url,
:include_podcast_value,
Expand Down
1 change: 0 additions & 1 deletion app/controllers/podcasts_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ def podcast_params
default_feed_attributes: [
:id,
:subtitle,
:summary,
:description,
itunes_category: [],
itunes_subcategory: [],
Expand Down
8 changes: 2 additions & 6 deletions app/helpers/podcasts_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,11 @@ def first_nonblank(type, items)
end

def show_itunes_summary?(model)
model.summary.present? || model.description.present?
model.description.present?
end

def itunes_summary(model)
if model.summary.present?
model.summary
else
sanitize_links_only(model.description || "")
end
sanitize_links_only(model.description || "")
end

def podcast_settings_active?
Expand Down
6 changes: 3 additions & 3 deletions app/models/concerns/import_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
require "prx_access"
require "net/http"
require "uri"
require "text_sanitizer"

module ImportUtils
include TextSanitizer
extend ActiveSupport::Concern

class HttpError < StandardError
Expand Down Expand Up @@ -110,9 +112,7 @@ def remove_podcastchoices_link(str)
end

def sanitize_html(text)
return nil if text.blank?
sanitizer = Rails::Html::WhiteListSanitizer.new
sanitizer.sanitize(Loofah.fragment(text).scrub!(:prune).to_s).strip
sanitize_white_list(text)
end

def files_match?(file, url)
Expand Down
28 changes: 25 additions & 3 deletions app/models/concerns/text_sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,25 @@ module TextSanitizer
def sanitize_white_list(text)
return nil if text.blank?
sanitizer = Rails::Html::WhiteListSanitizer.new
sanitizer.sanitize(Loofah.fragment(text).scrub!(:prune).to_s)
text = sanitizer.sanitize(Loofah.fragment(text).scrub!(:prune).to_s)
clean_whitespace(text)
end

def sanitize_links_only(text)
return nil if text.blank?
scrubber = Rails::Html::PermitScrubber.new
scrubber.tags = %w[a]
scrubber.attributes = %w[href target nofollow]
Loofah.fragment(text).scrub!(:prune).scrub!(scrubber).to_s
text = add_newlines_to_tags(text)
text = Loofah.fragment(text).scrub!(:prune).scrub!(scrubber).to_s
clean_whitespace(text)
end

def sanitize_text_only(text)
return nil if text.blank?
Loofah.fragment(text).scrub!(:prune).text(encode_special_chars: false)
text = add_newlines_to_tags(text)
text = Loofah.fragment(text).scrub!(:prune).text(encode_special_chars: false)
clean_whitespace(text)
end

def sanitize_categories(kws, strict)
Expand All @@ -37,4 +42,21 @@ def sanitize_category(kw, max_length, strict)
kw.strip.slice(0, max_length)
end
end

def add_newlines_to_tags(text)
text.gsub(/<p>/i, "\n<p>")
.gsub(/<\/p>/i, "</p>\n")
.gsub(/<div>/i, "\n<div>")
.gsub(/<\/div>/i, "</div>\n")
.gsub(/<br>/i, "\n<br>")
.gsub(/<br\s*\/>/i, "\n<br>")
end

def clean_whitespace(text)
text.gsub(/\R/, "\n") # if there is any kind of newline, make it \n
.gsub(/([ \t]*)\n([ \t]*)/, "\n") # get rid of whitespace on eother side of a newline
.gsub(/\n\n\n/, "\n\n") # no more than 2 newlines together
.squeeze(" ") # make all groups of spaces into a siingle space
.strip # get rid of leading or trailing spaces
end
end
2 changes: 0 additions & 2 deletions app/models/episode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,7 @@ def set_external_keyword

def sanitize_text
self.description = sanitize_white_list(description) if description_changed?
self.content = sanitize_white_list(content) if content_changed?
self.subtitle = sanitize_text_only(subtitle) if subtitle_changed?
self.summary = sanitize_links_only(summary) if summary_changed?
self.title = sanitize_text_only(title) if title_changed?
self.original_guid = original_guid.strip if !original_guid.blank? && original_guid_changed?
end
Expand Down
1 change: 0 additions & 1 deletion app/models/feed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def set_defaults
def sanitize_text
self.description = sanitize_white_list(description) if description_changed?
self.subtitle = sanitize_text_only(subtitle) if subtitle_changed?
self.summary = sanitize_links_only(summary) if summary_changed?
self.title = sanitize_text_only(title) if title_changed?
end

Expand Down
5 changes: 1 addition & 4 deletions app/models/imports/episode_rss_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ def update_episode_with_entry!
episode.subtitle = clean_string(episode_short_desc(entry))
episode.title = clean_title(entry[:title])

if entry[:itunes_summary] && entry_description_attribute(entry) != :itunes_summary
episode.summary = clean_text(entry[:itunes_summary])
end
episode.author = person(entry[:itunes_author] || entry[:author] || entry[:creator])
episode.block = (clean_string(entry[:itunes_block]) == "yes")
episode.explicit = explicit(entry[:itunes_explicit])
Expand All @@ -110,7 +107,7 @@ def entry_description(entry)
end

def entry_description_attribute(entry)
[:content, :itunes_summary, :description, :title].find { |d| !entry[d].blank? }
[:content, :description, :itunes_summary, :itunes_subtitle, :title].find { |d| !entry[d].blank? }
end

def title
Expand Down
3 changes: 1 addition & 2 deletions app/models/imports/podcast_rss_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def create_or_update_episode_imports!
end

def feed_description
result = [channel[:itunes_summary], channel[:description]].find { |d| !d.blank? }
result = [channel[:description], channel[:itunes_summary]].find { |d| !d.blank? }
clean_text(result)
end

Expand Down Expand Up @@ -185,7 +185,6 @@ def build_podcast_attributes
podcast_attributes[atr.to_sym] = clean_string(channel[atr])
end

podcast_attributes[:summary] = clean_text(channel[:itunes_summary])
podcast_attributes[:link] = clean_string(channel[:url])
podcast_attributes[:explicit] = explicit(channel[:itunes_explicit], "false")
podcast_attributes[:new_feed_url] = clean_string(channel[:itunes_new_feed_url])
Expand Down
2 changes: 1 addition & 1 deletion app/models/podcast.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require "text_sanitizer"

class Podcast < ApplicationRecord
FEED_ATTRS = %i[subtitle description summary url new_feed_url display_episodes_count
FEED_ATTRS = %i[subtitle description url new_feed_url display_episodes_count
display_full_episodes_count enclosure_prefix enclosure_template feed_image itunes_image
ready_feed_image ready_itunes_image ready_image itunes_category itunes_subcategory itunes_categories]
FEED_GETTERS = FEED_ATTRS.map { |s| [s, "#{s}_was".to_sym, "#{s}_changed?".to_sym] }.flatten
Expand Down
1 change: 0 additions & 1 deletion app/representers/api/auth/feed_representer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class Api::Auth::FeedRepresenter < Api::BaseRepresenter
property :title
property :subtitle
property :description
property :summary
property :url
property :new_feed_url
property :enclosure_prefix
Expand Down
2 changes: 0 additions & 2 deletions app/representers/api/episode_representer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ class Api::EpisodeRepresenter < Api::BaseRepresenter
property :clean_title
property :subtitle
property :description
property :content
property :summary
property :season_number
property :episode_number
property :itunes_type
Expand Down
1 change: 0 additions & 1 deletion app/representers/api/podcast_representer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class Api::PodcastRepresenter < Api::BaseRepresenter
property :title
property :subtitle
property :description
property :summary
property :itunes_block

property :explicit
Expand Down
2 changes: 0 additions & 2 deletions app/views/episodes/_form_main.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
</div>
<div class="card-body">
<div class="form-floating">
<%= form.hidden_field :summary, value: nil %>
<%= form.hidden_field :content, value: nil %>
<%= form.trix_editor :description %>
</div>
</div>
Expand Down
1 change: 0 additions & 1 deletion app/views/feeds/_form_overrides.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

<div class="col-12 mb-4">
<div class="form-floating input-group">
<%= form.hidden_field :summary, value: nil %>
<%= form.trix_editor :description %>
<%= form.label :description %>
<%= field_help_text t(".help.description") %>
Expand Down
1 change: 0 additions & 1 deletion app/views/podcasts/_form_main.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
<div class="card-body">
<div class="form-floating">
<%= form.fields_for :default_feed do |fields| %>
<%= fields.hidden_field :summary, value: nil %>
<%= fields.trix_editor :description %>
<% end %>
</div>
Expand Down
2 changes: 1 addition & 1 deletion app/views/podcasts/show.rss.builder
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ xml.rss "xmlns:atom" => "http://www.w3.org/2005/Atom",
url: ep.enclosure_url(@feed))
end

xml.content(:encoded) { xml.cdata!(ep.content) } unless ep.content.blank?
xml.content(:encoded) { xml.cdata!(episode_description(ep)) }
end
end
end
Expand Down
8 changes: 0 additions & 8 deletions test/factories/episode_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@
"<div><a href=\"/tina\">Tina</a> McElroy Ansa is a little girl when her father's business goes under.</div>"
end

content do
"<div><a href=\"/tina\">Tina</a> McElroy Ansa is a little girl when her father's business goes under.</div>"
end

summary do
"<a href=\"/tina\">Tina</a> McElroy Ansa is a little girl when her father's business goes under"
end

factory :episode_with_media do
audio_version { "One segment audio" }
segment_count { 1 }
Expand Down
1 change: 0 additions & 1 deletion test/factories/feed_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
title { "new feed" }
subtitle { "Goofy laughsters" }
description { "A goofy fun-time laughcast with doofuses" }
summary { "Public radio host Jesse Thorn and @midnight writer Jordan Morris goof around" }

url { "http://feeds.feedburner.com/thornmorris" }
new_feed_url { "http://feeds.feedburner.com/newthornmorris" }
Expand Down
27 changes: 22 additions & 5 deletions test/models/concerns/text_sanitizer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,37 @@ class TestSanitizer
describe TextSanitizer do
let(:model) { TestSanitizer.new }
let(:text) do
'<!-- Sorry! --><p>my</p> <b>dog & cat</b> <a href="/">ate</a> ' \
"<span><div>my</div></span> <script>homework</script>"
"<!-- Sorry! --><p>my</p>" \
" <b>dog & cat</b> <a href=\"/\">ate</a><span>" \
"<div>my</div>" \
"</span><script>homework</script>"
end

it "scrubs all tags" do
assert_equal model.sanitize_text_only(text), "my dog & cat ate my "
assert_equal model.sanitize_text_only(text), "my\ndog & cat ate\nmy"
end

it "leaves ampersands alone" do
assert_equal model.sanitize_text_only("Us & Them"), "Us & Them"
end

it "scrubs all but links" do
r = 'my dog &amp; cat <a href="/">ate</a> my '
r = "my\ndog &amp; cat <a href=\"/\">ate</a>\nmy"
assert_equal model.sanitize_links_only(text), r
end

it "adds space near some tags" do
t = "0<p>1</p>2<br>3<br />4<br/>5<div>6</div>7"
assert_equal model.add_newlines_to_tags(t), "0\n<p>1</p>\n2\n<br>3\n<br>4\n<br>5\n<div>6</div>\n7"
end

it "adds space for removed tags" do
t = " 0<p>1</p>2<br>3<br />4<br/>5<div> 6 </div>7 "
assert_equal model.sanitize_links_only(t), "0\n1\n2\n3\n4\n5\n6\n7"
end

it "scrubs all but white listed" do
r = '<p>my</p> <b>dog &amp; cat</b> <a href="/">ate</a> <span><div>my</div></span> '
r = '<p>my</p> <b>dog &amp; cat</b> <a href="/">ate</a><span><div>my</div></span>'
assert_equal model.sanitize_white_list(text), r
end

Expand All @@ -34,4 +46,9 @@ class TestSanitizer
"<caption></caption>\n</tbody>\n<tfoot></tfoot>\n</table>"
assert_equal model.sanitize_white_list(text), text
end

it "cleans up white space" do
text = " text, \r \n \r more\n \ntext, and \r words\n \n \r\r \n"
assert_equal model.clean_whitespace(text), "text,\n\nmore\n\ntext, and\nwords"
end
end
8 changes: 3 additions & 5 deletions test/models/imports/podcast_rss_import_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,9 @@
_(importer.podcast.title).must_equal "Transistor"
_(importer.podcast.subtitle).must_equal "A podcast of scientific questions and " \
"stories featuring guest hosts and reporters."
_(importer.podcast.description).must_equal "Transistor is a podcast of scientific curiosities " \
"and current events, featuring guest hosts, " \
"scientists, and story-driven reporters. Presented " \
"by radio and podcast powerhouse PRX, with support " \
"from the Sloan Foundation."
_(importer.podcast.description).must_equal "A podcast of scientific questions and stories," \
" with many episodes hosted by key scientists" \
" at the forefront of discovery."

_(importer.podcast.url).must_equal "http://feeds.prx.org/transistor_stem"
_(importer.podcast.new_feed_url).must_equal "http://feeds.prx.org/transistor_stem"
Expand Down
1 change: 0 additions & 1 deletion test/representers/api/auth/feed_representer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
_(json["slug"]).must_match(/myfeed(\d+)/)
_(json["subtitle"]).must_equal feed.subtitle
_(json["description"]).must_equal feed.description
_(json["summary"]).must_equal feed.summary
end

it "has links" do
Expand Down
13 changes: 2 additions & 11 deletions test/representers/api/episode_representer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

it "includes basic properties" do
assert_match(%r{/api/v1/stories/}, json["prxUri"])
assert_match(%r{<a href="/tina">Tina</a>}, json["summary"])
assert_match(%r{<a href="/tina">Tina</a>}, json["description"])
end

it "includes clean title, season, episode, and ep type info" do
Expand All @@ -30,17 +30,8 @@
assert_equal json["explicitContent"], true
end

it "uses summary when not blank" do
episode.summary = 'summary has <a href="/">a link</a>'
it "uses sanitized description" do
episode.description = '<b>tags</b> removed, <a href="/">links remain</a>'
assert_equal json["summary"], episode.summary
assert_equal json["description"], episode.description
end

it "uses sanitized description for nil summary" do
episode.summary = nil
episode.description = '<b>tags</b> removed, <a href="/">links remain</a>'
assert_nil json["summary"]
assert_equal json["description"], episode.description
end

Expand Down

0 comments on commit ae8f93e

Please sign in to comment.