Extract Shortener class from DocFormatter. (397c42fa) · Commits · extjs / jsduck

lib/jsduck/class_formatter.rb

+3 −2

Original line number	Diff line number	Diff line
		require 'jsduck/type_parser'
		require 'jsduck/logger'
		require 'jsduck/meta_tag_registry'
		require 'jsduck/shortener'

		module JsDuck

		@@ -34,8 +35,8 @@ module JsDuck
		def format_member(m)
		@formatter.doc_context = m[:files][0]
		m[:doc] = @formatter.format(m[:doc]) if m[:doc]
		if expandable?(m) \|\| @formatter.too_long?(m[:doc])
		m[:shortDoc] = @formatter.shorten(m[:doc])
		if expandable?(m) \|\| Shortener.too_long?(m[:doc])
		m[:shortDoc] = Shortener.shorten(m[:doc])
		end

		# We don't validate and format CSS var and mixin type definitions

lib/jsduck/doc_formatter.rb

+0 −41

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		require 'rubygems'
		require 'strscan'
		require 'rdiscount'
		@@ -6,17 +5,13 @@ require 'jsduck/logger'
		require 'jsduck/inline/link'
		require 'jsduck/inline/img'
		require 'jsduck/inline/video'
		require 'jsduck/util/html'

		module JsDuck

		# Formats doc-comments
		class DocFormatter
		# Maximum length for text that doesn't get shortened, defaults to 120
		attr_accessor :max_length

		def initialize(relations={}, opts={})
		@max_length = 120
		@images = []

		@inline_link = Inline::Link.new(opts)
		@@ -147,42 +142,6 @@ module JsDuck
		replace(RDiscount.new(input).to_html)
		end

		# Shortens text
		#
		# 116 chars is also where ext-doc makes its cut, but unlike
		# ext-doc we only make the cut when there's more than 120 chars.
		#
		# This way we don't get stupid expansions like:
		#
		# Blah blah blah some text...
		#
		# expanding to:
		#
		# Blah blah blah some text.
		#
		def shorten(input)
		sent = first_sentence(Util::HTML.strip_tags(input).strip)
		# Use u-modifier to correctly count multi-byte characters
		chars = sent.scan(/./mu)
		if chars.length > @max_length
		chars[0..(@max_length-4)].join + "..."
		else
		sent + " ..."
		end
		end

		def first_sentence(str)
		str.sub(/\A(.+?(\.\|。))\s.*\Z/mu, "\\1")
		end

		# Returns true when input should get shortened.
		def too_long?(input)
		stripped = Util::HTML.strip_tags(input).strip
		# for sentence v/s full - compare byte length
		# for full v/s max - compare char length
		first_sentence(stripped).length < stripped.length \|\| stripped.scan(/./mu).length > @max_length
		end

		end

		end

lib/jsduck/shortener.rb

0 → 100644

+58 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		require 'jsduck/util/html'
		require 'jsduck/util/singleton'

		module JsDuck

		# Little helper for shortening text
		class Shortener
		include Util::Singleton

		# Maximum length for text that doesn't get shortened.
		# The accessor is used for testing purposes only.
		attr_accessor :max_length

		def initialize
		@max_length = 120
		end

		# Shortens text
		#
		# 116 chars is also where ext-doc makes its cut, but unlike
		# ext-doc we only make the cut when there's more than 120 chars.
		#
		# This way we don't get stupid expansions like:
		#
		# Blah blah blah some text...
		#
		# expanding to:
		#
		# Blah blah blah some text.
		#
		def shorten(input)
		sent = first_sentence(Util::HTML.strip_tags(input).strip)
		# Use u-modifier to correctly count multi-byte characters
		chars = sent.scan(/./mu)
		if chars.length > @max_length
		chars[0..(@max_length-4)].join + "..."
		else
		sent + " ..."
		end
		end

		# Returns the first sentence inside a string.
		def first_sentence(str)
		str.sub(/\A(.+?(\.\|。))\s.*\Z/mu, "\\1")
		end

		# Returns true when input should get shortened.
		def too_long?(input)
		stripped = Util::HTML.strip_tags(input).strip
		# for sentence v/s full - compare byte length
		# for full v/s max - compare char length
		first_sentence(stripped).length < stripped.length \|\| stripped.scan(/./mu).length > @max_length
		end

		end

		end

spec/doc_formatter_spec.rb

+0 −86

Original line number	Diff line number	Diff line
		@@ -466,90 +466,4 @@ describe JsDuck::DocFormatter do

		end

		describe "#shorten" do

		before do
		@formatter.max_length = 10
		end

		it "appends ellipsis to short text" do
		@formatter.shorten("Ha ha").should == "Ha ha ..."
		end

		it "shortens text longer than max length" do
		@formatter.shorten("12345678901").should == "1234567..."
		end

		it "counts multi-byte characters correctly when measuring text length" do
		# Text ending with a-umlaut character
		@formatter.shorten("123456789ä").should == "123456789ä ..."
		end

		it "shortens text with multi-byte characters correctly" do
		# Text containing a-umlaut character
		@formatter.shorten("123456ä8901").should == "123456ä..."
		end

		it "strips HTML tags when shortening" do
		@formatter.shorten("<a href='some-long-link'>12345678901</a>").should == "1234567..."
		end

		it "takes only first centence" do
		@formatter.shorten("bla. blah").should == "bla. ..."
		end
		end

		describe "#too_long?" do

		before do
		@formatter.max_length = 10
		end

		it "is false when exactly equal to the max_length" do
		@formatter.too_long?("1234567890").should == false
		end

		it "is false when short sentence" do
		@formatter.too_long?("bla bla.").should == false
		end

		it "is true when long sentence" do
		@formatter.too_long?("bla bla bla.").should == true
		end

		it "ignores HTML tags when calculating text length" do
		@formatter.too_long?("<a href='some-long-link'>Foo</a>").should == false
		end

		it "counts multi-byte characters correctly" do
		# Text ending with a-umlaut character
		@formatter.too_long?("123456789ä").should == false
		end
		end


		describe "#first_sentence" do
		it "extracts first sentence" do
		@formatter.first_sentence("Hi John. This is me.").should == "Hi John."
		end
		it "extracts first sentence of multiline text" do
		@formatter.first_sentence("Hi\nJohn.\nThis\nis\nme.").should == "Hi\nJohn."
		end
		it "returns everything if no dots in text" do
		@formatter.first_sentence("Hi John this is me").should == "Hi John this is me"
		end
		it "returns everything if no dots in text" do
		@formatter.first_sentence("Hi John this is me").should == "Hi John this is me"
		end
		it "ignores dots inside words" do
		@formatter.first_sentence("Hi John th.is is me").should == "Hi John th.is is me"
		end
		it "ignores first empty sentence" do
		@formatter.first_sentence(". Hi John. This is me.").should == ". Hi John."
		end
		it "understands chinese/japanese full-stop character as end of sentence" do
		@formatter.first_sentence("Some Chinese Text。 And some more。").should == "Some Chinese Text。"
		end
		end

		end

spec/shortener_spec.rb

0 → 100644

+104 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		require "jsduck/shortener"

		describe JsDuck::Shortener do

		describe "#shorten" do

		def shorten(text)
		JsDuck::Shortener.shorten(text)
		end

		before do
		JsDuck::Shortener.instance.max_length = 10
		end

		it "appends ellipsis to short text" do
		shorten("Ha ha").should == "Ha ha ..."
		end

		it "shortens text longer than max length" do
		shorten("12345678901").should == "1234567..."
		end

		it "counts multi-byte characters correctly when measuring text length" do
		# Text ending with a-umlaut character
		shorten("123456789ä").should == "123456789ä ..."
		end

		it "shortens text with multi-byte characters correctly" do
		# Text containing a-umlaut character
		shorten("123456ä8901").should == "123456ä..."
		end

		it "strips HTML tags when shortening" do
		shorten("<a href='some-long-link'>12345678901</a>").should == "1234567..."
		end

		it "takes only first centence" do
		shorten("bla. blah").should == "bla. ..."
		end
		end

		describe "#too_long?" do

		def too_long?(text)
		JsDuck::Shortener.too_long?(text)
		end

		before do
		JsDuck::Shortener.instance.max_length = 10
		end

		it "is false when exactly equal to the max_length" do
		too_long?("1234567890").should == false
		end

		it "is false when short sentence" do
		too_long?("bla bla.").should == false
		end

		it "is true when long sentence" do
		too_long?("bla bla bla.").should == true
		end

		it "ignores HTML tags when calculating text length" do
		too_long?("<a href='some-long-link'>Foo</a>").should == false
		end

		it "counts multi-byte characters correctly" do
		# Text ending with a-umlaut character
		too_long?("123456789ä").should == false
		end
		end


		describe "#first_sentence" do
		def first_sentence(text)
		JsDuck::Shortener.first_sentence(text)
		end

		it "extracts first sentence" do
		first_sentence("Hi John. This is me.").should == "Hi John."
		end
		it "extracts first sentence of multiline text" do
		first_sentence("Hi\nJohn.\nThis\nis\nme.").should == "Hi\nJohn."
		end
		it "returns everything if no dots in text" do
		first_sentence("Hi John this is me").should == "Hi John this is me"
		end
		it "returns everything if no dots in text" do
		first_sentence("Hi John this is me").should == "Hi John this is me"
		end
		it "ignores dots inside words" do
		first_sentence("Hi John th.is is me").should == "Hi John th.is is me"
		end
		it "ignores first empty sentence" do
		first_sentence(". Hi John. This is me.").should == ". Hi John."
		end
		it "understands chinese/japanese full-stop character as end of sentence" do
		first_sentence("Some Chinese Text。 And some more。").should == "Some Chinese Text。"
		end
		end

		end