From 44e73fc8745f6c68f10b8014755ae5d16e887958 Mon Sep 17 00:00:00 2001
From: Rene Saarsoo <nene@triin.net>
Date: Tue, 17 Sep 2013 14:54:14 +0300
Subject: [PATCH] Clean up unused files from .cache directory.

We remember each cache entry that was read or written during the
current jsduck run.  All others get deleted.  This ensures the .cache
directory doesn't grow over time.

Refs #446
---
 lib/jsduck/batch_parser.rb | 14 ++++++++++++--
 lib/jsduck/cache.rb        | 28 +++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/lib/jsduck/batch_parser.rb b/lib/jsduck/batch_parser.rb
index 8abe6677..f229bd56 100644
--- a/lib/jsduck/batch_parser.rb
+++ b/lib/jsduck/batch_parser.rb
@@ -14,21 +14,31 @@ module JsDuck
     def self.parse(opts)
       cache = Cache.create(opts)
 
-      Util::Parallel.map(opts.input_files) do |fname|
+      results = Util::Parallel.map(opts.input_files) do |fname|
         Logger.log("Parsing", fname)
+
         begin
           source = Util::IO.read(fname)
           docs = nil
+
           unless docs = cache.read(source)
             docs = Parser.new.parse(source, fname, opts)
             cache.write(source, docs)
           end
-          Source::File.new(source, docs, fname)
+
+          {
+            :file => Source::File.new(source, docs, fname),
+            :cache => cache.previous_entry,
+          }
         rescue
           Logger.fatal_backtrace("Error while parsing #{fname}", $!)
           exit(1)
         end
       end
+
+      cache.cleanup( results.map {|r| r[:cache] }.compact )
+
+      return results.map {|r| r[:file] }
     end
 
   end
diff --git a/lib/jsduck/cache.rb b/lib/jsduck/cache.rb
index cb563daf..47127f9c 100644
--- a/lib/jsduck/cache.rb
+++ b/lib/jsduck/cache.rb
@@ -1,6 +1,7 @@
 require 'digest/md5'
 require 'fileutils'
 require 'jsduck/util/null_object'
+require 'set'
 
 module JsDuck
 
@@ -15,12 +16,23 @@ module JsDuck
       if opts.cache && opts.cache_dir
         Cache.new(opts.cache_dir)
       else
-        Util::NullObject.new(:read => nil, :write => nil)
+        Util::NullObject.new(
+          :read => nil,
+          :write => nil,
+          :previous_entry => nil,
+          :cleanup => nil
+          )
       end
     end
 
+    # The name of the cache file that was previously read or written.
+    # When the #read call failed to find the file, it will be nil.
+    # But it will always be available after the #write call.
+    attr_reader :previous_entry
+
     def initialize(cache_dir)
       @cache_dir = cache_dir
+      @previous_entry = nil
       FileUtils.mkdir_p(cache_dir) unless File.exists?(cache_dir)
     end
 
@@ -29,8 +41,10 @@ module JsDuck
     def read(file_contents)
       fname = file_name(file_contents)
       if File.exists?(fname)
+        @previous_entry = fname
         File.open(fname, "rb") {|file| Marshal::load(file) }
       else
+        @previous_entry = nil
         nil
       end
     end
@@ -39,9 +53,21 @@ module JsDuck
     # contents of a source file.
     def write(file_contents, data)
       fname = file_name(file_contents)
+      @previous_entry = fname
       File.open(fname, "wb") {|file| Marshal::dump(data, file) }
     end
 
+    # Given listing of used cache files (those that were either read
+    # or written during this jsduck run) removes rest of the files
+    # from cache directory that were unused.
+    def cleanup(used_cache_entries)
+      used = Set.new(used_cache_entries)
+
+      Dir[@cache_dir + "/*.dat"].each do |file|
+        FileUtils.rm_rf(file) unless used.include?(file)
+      end
+    end
+
     private
 
     def file_name(file_contents)
-- 
GitLab