Commit 397c42fa authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Extract Shortener class from DocFormatter.

No reason at all for it to be part of DocFormatter.
parent 50a03428
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
require 'jsduck/type_parser'
require 'jsduck/logger'
require 'jsduck/meta_tag_registry'
require 'jsduck/shortener'

module JsDuck

@@ -34,8 +35,8 @@ module JsDuck
    def format_member(m)
      @formatter.doc_context = m[:files][0]
      m[:doc] = @formatter.format(m[:doc]) if m[:doc]
      if expandable?(m) || @formatter.too_long?(m[:doc])
        m[:shortDoc] = @formatter.shorten(m[:doc])
      if expandable?(m) || Shortener.too_long?(m[:doc])
        m[:shortDoc] = Shortener.shorten(m[:doc])
      end

      # We don't validate and format CSS var and mixin type definitions
+0 −41
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
require 'rubygems'
require 'strscan'
require 'rdiscount'
@@ -6,17 +5,13 @@ require 'jsduck/logger'
require 'jsduck/inline/link'
require 'jsduck/inline/img'
require 'jsduck/inline/video'
require 'jsduck/util/html'

module JsDuck

  # Formats doc-comments
  class DocFormatter
    # Maximum length for text that doesn't get shortened, defaults to 120
    attr_accessor :max_length

    def initialize(relations={}, opts={})
      @max_length = 120
      @images = []

      @inline_link = Inline::Link.new(opts)
@@ -147,42 +142,6 @@ module JsDuck
      replace(RDiscount.new(input).to_html)
    end

    # Shortens text
    #
    # 116 chars is also where ext-doc makes its cut, but unlike
    # ext-doc we only make the cut when there's more than 120 chars.
    #
    # This way we don't get stupid expansions like:
    #
    #   Blah blah blah some text...
    #
    # expanding to:
    #
    #   Blah blah blah some text.
    #
    def shorten(input)
      sent = first_sentence(Util::HTML.strip_tags(input).strip)
      # Use u-modifier to correctly count multi-byte characters
      chars = sent.scan(/./mu)
      if chars.length > @max_length
        chars[0..(@max_length-4)].join + "..."
      else
        sent + " ..."
      end
    end

    def first_sentence(str)
      str.sub(/\A(.+?(\.|。))\s.*\Z/mu, "\\1")
    end

    # Returns true when input should get shortened.
    def too_long?(input)
      stripped = Util::HTML.strip_tags(input).strip
      # for sentence v/s full - compare byte length
      # for full v/s max - compare char length
      first_sentence(stripped).length < stripped.length || stripped.scan(/./mu).length > @max_length
    end

  end

end
+58 −0
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
require 'jsduck/util/html'
require 'jsduck/util/singleton'

module JsDuck

  # Little helper for shortening text
  class Shortener
    include Util::Singleton

    # Maximum length for text that doesn't get shortened.
    # The accessor is used for testing purposes only.
    attr_accessor :max_length

    def initialize
      @max_length = 120
    end

    # Shortens text
    #
    # 116 chars is also where ext-doc makes its cut, but unlike
    # ext-doc we only make the cut when there's more than 120 chars.
    #
    # This way we don't get stupid expansions like:
    #
    #   Blah blah blah some text...
    #
    # expanding to:
    #
    #   Blah blah blah some text.
    #
    def shorten(input)
      sent = first_sentence(Util::HTML.strip_tags(input).strip)
      # Use u-modifier to correctly count multi-byte characters
      chars = sent.scan(/./mu)
      if chars.length > @max_length
        chars[0..(@max_length-4)].join + "..."
      else
        sent + " ..."
      end
    end

    # Returns the first sentence inside a string.
    def first_sentence(str)
      str.sub(/\A(.+?(\.|。))\s.*\Z/mu, "\\1")
    end

    # Returns true when input should get shortened.
    def too_long?(input)
      stripped = Util::HTML.strip_tags(input).strip
      # for sentence v/s full - compare byte length
      # for full v/s max - compare char length
      first_sentence(stripped).length < stripped.length || stripped.scan(/./mu).length > @max_length
    end

  end

end
+0 −86
Original line number Diff line number Diff line
@@ -466,90 +466,4 @@ describe JsDuck::DocFormatter do

  end

  describe "#shorten" do

    before do
      @formatter.max_length = 10
    end

    it "appends ellipsis to short text" do
      @formatter.shorten("Ha ha").should == "Ha ha ..."
    end

    it "shortens text longer than max length" do
      @formatter.shorten("12345678901").should == "1234567..."
    end

    it "counts multi-byte characters correctly when measuring text length" do
      # Text ending with a-umlaut character
      @formatter.shorten("123456789ä").should == "123456789ä ..."
    end

    it "shortens text with multi-byte characters correctly" do
      # Text containing a-umlaut character
      @formatter.shorten("123456ä8901").should == "123456ä..."
    end

    it "strips HTML tags when shortening" do
      @formatter.shorten("<a href='some-long-link'>12345678901</a>").should == "1234567..."
    end

    it "takes only first centence" do
      @formatter.shorten("bla. blah").should == "bla. ..."
    end
  end

  describe "#too_long?" do

    before do
      @formatter.max_length = 10
    end

    it "is false when exactly equal to the max_length" do
      @formatter.too_long?("1234567890").should == false
    end

    it "is false when short sentence" do
      @formatter.too_long?("bla bla.").should == false
    end

    it "is true when long sentence" do
      @formatter.too_long?("bla bla bla.").should == true
    end

    it "ignores HTML tags when calculating text length" do
      @formatter.too_long?("<a href='some-long-link'>Foo</a>").should == false
    end

    it "counts multi-byte characters correctly" do
      # Text ending with a-umlaut character
      @formatter.too_long?("123456789ä").should == false
    end
  end


  describe "#first_sentence" do
    it "extracts first sentence" do
      @formatter.first_sentence("Hi John. This is me.").should == "Hi John."
    end
    it "extracts first sentence of multiline text" do
      @formatter.first_sentence("Hi\nJohn.\nThis\nis\nme.").should == "Hi\nJohn."
    end
    it "returns everything if no dots in text" do
      @formatter.first_sentence("Hi John this is me").should == "Hi John this is me"
    end
    it "returns everything if no dots in text" do
      @formatter.first_sentence("Hi John this is me").should == "Hi John this is me"
    end
    it "ignores dots inside words" do
      @formatter.first_sentence("Hi John th.is is me").should == "Hi John th.is is me"
    end
    it "ignores first empty sentence" do
      @formatter.first_sentence(". Hi John. This is me.").should == ". Hi John."
    end
    it "understands chinese/japanese full-stop character as end of sentence" do
      @formatter.first_sentence("Some Chinese Text。 And some more。").should == "Some Chinese Text。"
    end
  end

end

spec/shortener_spec.rb

0 → 100644
+104 −0
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
require "jsduck/shortener"

describe JsDuck::Shortener do

  describe "#shorten" do

    def shorten(text)
      JsDuck::Shortener.shorten(text)
    end

    before do
      JsDuck::Shortener.instance.max_length = 10
    end

    it "appends ellipsis to short text" do
      shorten("Ha ha").should == "Ha ha ..."
    end

    it "shortens text longer than max length" do
      shorten("12345678901").should == "1234567..."
    end

    it "counts multi-byte characters correctly when measuring text length" do
      # Text ending with a-umlaut character
      shorten("123456789ä").should == "123456789ä ..."
    end

    it "shortens text with multi-byte characters correctly" do
      # Text containing a-umlaut character
      shorten("123456ä8901").should == "123456ä..."
    end

    it "strips HTML tags when shortening" do
      shorten("<a href='some-long-link'>12345678901</a>").should == "1234567..."
    end

    it "takes only first centence" do
      shorten("bla. blah").should == "bla. ..."
    end
  end

  describe "#too_long?" do

    def too_long?(text)
      JsDuck::Shortener.too_long?(text)
    end

    before do
      JsDuck::Shortener.instance.max_length = 10
    end

    it "is false when exactly equal to the max_length" do
      too_long?("1234567890").should == false
    end

    it "is false when short sentence" do
      too_long?("bla bla.").should == false
    end

    it "is true when long sentence" do
      too_long?("bla bla bla.").should == true
    end

    it "ignores HTML tags when calculating text length" do
      too_long?("<a href='some-long-link'>Foo</a>").should == false
    end

    it "counts multi-byte characters correctly" do
      # Text ending with a-umlaut character
      too_long?("123456789ä").should == false
    end
  end


  describe "#first_sentence" do
    def first_sentence(text)
      JsDuck::Shortener.first_sentence(text)
    end

    it "extracts first sentence" do
      first_sentence("Hi John. This is me.").should == "Hi John."
    end
    it "extracts first sentence of multiline text" do
      first_sentence("Hi\nJohn.\nThis\nis\nme.").should == "Hi\nJohn."
    end
    it "returns everything if no dots in text" do
      first_sentence("Hi John this is me").should == "Hi John this is me"
    end
    it "returns everything if no dots in text" do
      first_sentence("Hi John this is me").should == "Hi John this is me"
    end
    it "ignores dots inside words" do
      first_sentence("Hi John th.is is me").should == "Hi John th.is is me"
    end
    it "ignores first empty sentence" do
      first_sentence(". Hi John. This is me.").should == ". Hi John."
    end
    it "understands chinese/japanese full-stop character as end of sentence" do
      first_sentence("Some Chinese Text。 And some more。").should == "Some Chinese Text。"
    end
  end

end