/*
 * Copyright (c) 2007 Gerhard Beck.  All rights reserved.
 *
 * Subject to the GNU GENERAL PUBLIC LICENSE,
 * Version 3, 29 June 2007 http://www.gnu.org/licenses/gpl.html
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL GERHARD BECK OR
 * OTHER CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

package org.gerhardb.jibs.optimizer;

import java.io.File;
import java.io.FileReader;
import java.text.DecimalFormat;
import java.util.Date;

import org.gerhardb.jibs.Jibs;
import org.gerhardb.jibs.util.FileLoopHelper;
import org.gerhardb.lib.io.EzLogger;
import org.gerhardb.lib.util.Killer;

/**
 *
 */
public class Deduper
{
   // Passed In
   FileLoopHelper myHelper;
   File[] myWorkingFiles;
   File[] myStableDirs;
   Killer myKiller;

   // Computed
   long[] myWorkingLengths = null;

   // Always start with these values
   int myMaxSameSize = 0;
   int myDuplicate = 0;
   int myDuplicateNot = 0;
   int mySampleHelped = 0;
   long myKiloBytesDeleted = 0;

   //===========================================================================
   //                              Constructors
   //===========================================================================
   /**
    * Gets Deduper Ready.
    * Information that does not change even if only deduping within
    * directories.
    */
   Deduper(FileLoopHelper helper, File[] stableDirs, Killer killer)
   {
      this.myHelper = helper;
      this.myStableDirs = stableDirs;
      this.myKiller = killer;
      this.myHelper.setIndeterminate(false);
   }

   //===========================================================================
   //                         Quiz Methods
   //===========================================================================
   int getRemovedCount()
   {
      return this.myDuplicate;
   }

   int getMaxSameSize()
   {
      return this.myMaxSameSize;
   }

   //===========================================================================
   //                          Real Work Functions
   //===========================================================================

   boolean setWorking(File[] workingFiles)
   {
      this.myWorkingFiles = workingFiles;

      // Short circut if there is nothing to do.
      if (workingFiles == null || workingFiles.length == 0)
      {
         this.myHelper.getLog().logLine("     " + Jibs.getString("Deduper.0")); //$NON-NLS-1$ //$NON-NLS-2$
         return false;
      }

      // Let's find out how long each file is.
      // We only compare files of the same length.
      this.myWorkingLengths = new long[this.myWorkingFiles.length];
      for (int i = 0; i < this.myWorkingFiles.length; i++)
      {
         this.myWorkingLengths[i] = this.myWorkingFiles[i].length();
      }

      return true;
   }

   void dedupStable()
   {
      long stableFileCount = 0;
      Date startTime = new Date();
      this.myHelper.getLog().logLine(EzLogger.DIVIDER);

      this.myHelper.setValue(0);
      this.myHelper.setMaximum(this.myStableDirs.length - 1);
      NoDirectories noDirectories = new NoDirectories();
      for (int i = 0; i < this.myStableDirs.length; i++)
      {
         if (this.myKiller.die())
         {
             break;
         }
         this.myHelper.setValue(i);
         String progress =
            Jibs.getString("Deduper.1") + i + ": " + this.myStableDirs[i]; //$NON-NLS-1$ //$NON-NLS-2$
         this.myHelper.getLabel().setText(progress);
         this.myHelper.getLog().logLine(progress);

         // Replace working list with the individual directory lists
         // before processing.
         File[] stableFiles = this.myStableDirs[i].listFiles(noDirectories);
         for (int j = 0; j < stableFiles.length; j++)
         {
            // Always start at zero to scan the entire working list.
            // That is, check each entry in the working list against the
            // entire stable list.
            dedup(stableFiles[j], stableFiles[j].length(), 0);
         }
         stableFileCount = stableFileCount + stableFiles.length;
      } 

      Date stopTime = new Date();
      this.myHelper.getLog().logLine(EzLogger.DIVIDER);
      this.myHelper.getLog().logTime(Jibs.getString("Deduper.3"), startTime, stopTime); //$NON-NLS-1$
      DecimalFormat formatter = new DecimalFormat();
      this.myHelper.getLog().logLine(Jibs.getString("Deduper.4") + " " + formatter.format(stableFileCount) //$NON-NLS-1$ //$NON-NLS-2$
      		 + " " + Jibs.getString("Deduper.5") + " " //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
         + formatter.format(this.myWorkingFiles.length) + " " + Jibs.getString("Deduper.6")); //$NON-NLS-1$ //$NON-NLS-2$
      this.myHelper.getLog().logLine(Jibs.getString("Deduper.7") + Jibs.getString("colon") + " " + formatter.format(this.mySampleHelped) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
      	+ " " + Jibs.getString("Deduper.8") + Jibs.getString("colon") + " " + formatter.format(this.myDuplicateNot) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
          + " " + Jibs.getString("Deduper.9") + Jibs.getString("colon") + " " + formatter.format(this.myDuplicate)); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
      this.myHelper.getLog().logLine(Jibs.getString("Deduper.10") + Jibs.getString("colon") + " " //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
         + formatter.format(this.myKiloBytesDeleted/1024));
      this.myHelper.getLog().logLine(EzLogger.DIVIDER);
   }

   void dedupWorking()
   {
      // Process the WORKING list
      this.myHelper.setValue(0);
      this.myHelper.setMaximum(this.myWorkingFiles.length - 1);
      for (int i = 0; i < this.myWorkingFiles.length; i++)
      {
         if (this.myKiller.die())
         {
            break;
         }
         this.myHelper.setValue(i);

         // File could be null because of dedup process
         // That is, nulling the file out is how to indicate the file has been
         // fully processed.  Also, files of zero length are disregarded.
         if (this.myWorkingFiles[i] != null && this.myWorkingLengths[i] > 0)
         {
            // Start just after the file you are checking because
            // you do not need to rescan the front of the working list.
            dedup(this.myWorkingFiles[i], this.myWorkingLengths[i], i + 1);
         }
      }
      this.myHelper.getLog().logLine(EzLogger.DIVIDER);
      DecimalFormat formatter = new DecimalFormat();
      this.myHelper.getLog().logLine(Jibs.getString("Deduper.11") + Jibs.getString("colon") + " " + formatter.format(this.mySampleHelped) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
         + " " + Jibs.getString("Deduper.12") + Jibs.getString("colon") + " " + formatter.format(this.myDuplicateNot) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
         + " " + Jibs.getString("Deduper.13") + Jibs.getString("colon") + " " + formatter.format(this.myDuplicate)); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
      this.myHelper.getLog().logLine(
         Jibs.getString("Deduper.14") + Jibs.getString("colon") + " " //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
         + formatter.format(this.myKiloBytesDeleted/1024));
      this.myHelper.getLog().logLine(EzLogger.DIVIDER);
   }

   //===========================================================================
   //                              Private Functions
   //===========================================================================

   /**
    * Only looks at the WORKING list not the stable list.
    * @param keeper File
    * @param keeperLength int
    * @param dedupStart int
    * @return int
    */
   private void dedup(File keeper, long keeperLength, int dedupStart)
   {
      if (keeper == null || keeperLength < 1 )
      {
         return;
      }
      // Before doing any file reads at all,
      // let's just see if there are any files of the same size to
      // compare against.
      int sameSizeCount = 0;
      for (int i = dedupStart; i < this.myWorkingLengths.length; i++)
      {
         if (keeperLength == this.myWorkingLengths[i])
         {
            sameSizeCount++;
         }
      }

      // See if we have anything to do.
      if (sameSizeCount == 0)
      {
         return;
      }

      // See if we have a new max same size.
      if (sameSizeCount > this.myMaxSameSize)
      {
         this.myMaxSameSize = sameSizeCount;
      }

      // Start the real deduping by reading in the entire file to check for.
      char[] keepTest = getSample(keeper, keeperLength);

      // Now loop through the rest, first checking a sample, then
      // checking the entire file if the sample works out.
      for (int kill = dedupStart; kill < this.myWorkingLengths.length; kill++)
      {
         if (this.myKiller.die() )
         {
            return;
         }

         if (keeperLength == this.myWorkingLengths[kill])
         {
            char[] killTest =
               getSample(this.myWorkingFiles[kill], this.myWorkingLengths[kill]);
            if (duplicate(keepTest, killTest))
            {
               char[] keepChar = readFile(keeper, keeperLength);
               char[] killChar =
                  readFile(this.myWorkingFiles[kill], this.myWorkingLengths[kill]);
               if (duplicate(keepChar, killChar))
               {
                  this.myKiloBytesDeleted =
                     this.myKiloBytesDeleted +
                     (this.myWorkingFiles[kill].length()/1024);
                  this.myHelper.getLog().logLine(
                     Jibs.getString("Deduper.15") + Jibs.getString("colon") + " " //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                     + this.myWorkingFiles[kill].getAbsolutePath() + ""); //$NON-NLS-1$
                  this.myWorkingFiles[kill].delete();
                  this.myWorkingFiles[kill] = null;
                  this.myWorkingLengths[kill] = 0;
                  this.myDuplicate++;
               }
               else
               {
                  this.myDuplicateNot++;
               }
            }
            else
            {
               this.mySampleHelped++;
            }
         }
      }
   }

   private boolean duplicate(char[] keep, char[] kill)
   {
      if (keep.length < kill.length)
      {
         return false;
      }
      for (int i = 0; i < kill.length; i++)
      {
         if (kill[i] != keep[i])
         {
            return false;
         }
      }
      return true;
   }

   private char[] getSample(File file, long length)
   {
      // Limit sample to 1024, if available.
      int intLength = 1024;
      if (length < 1024)
      {
         intLength = (int)length;
      }

      char[] rtnMe = new char[intLength];
      try
      {
         FileReader reader = new FileReader(file);
         reader.read(rtnMe, 0, rtnMe.length);
         reader.close();
      }
      catch (Exception e)
      {
         this.myHelper.getLog().logLine(
            Jibs.getString("Deduper.17") + Jibs.getString("colon") + " " + file.getAbsolutePath() + ""); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
         // Fall through loops faster.
         return new char[0];
      }
      return rtnMe;
   }

   private char[] readFile(File file, long length)
   {
      int intLength = Integer.MAX_VALUE;
      if (length < Integer.MAX_VALUE)
      {
         intLength = (int)length;
      }
      char[] rtnMe = new char[intLength];
      try
      {
         FileReader reader = new FileReader(file);
         reader.read(rtnMe, 0, rtnMe.length);
         reader.close();
      }
      catch (Exception e)
      {
         this.myHelper.getLog().logLine(
            Jibs.getString("Deduper.19") + Jibs.getString("colon") + " " + file.getAbsolutePath() + ""); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
         // Fall through loops faster.
         return new char[0];
      }
      return rtnMe;
   }
}

