/*
 * Copyright (c) 2010 Gerhard Beck. All rights reserved.
 * 
 * Subject to the GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007
 * http://www.gnu.org/licenses/gpl.html
 * 
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GERHARD
 * BECK OR OTHER CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.gerhardb.jibs.textPad.ranker;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;

import javax.swing.*;

import org.gerhardb.lib.io.FileUtil;

class WordCount
{
	private static final String[] ENDINGS = { "txt", "htm", "html", "nat" };
	public static final String TREE_BASE = "Tree Base: ";

	// Used because Linux allows commas in file names!!
	// Hopefully this will be unique and never in a file name.
	// Comma added to still be somewhat compatible with Excel.
	static final String FILE_TOKEN = "=!:,";

	JProgressBar myProgress;
	boolean iStop;

	WordCount(JProgressBar progress)
	{
		this.myProgress = progress;
	}

	@SuppressWarnings("null")
	void count(String[] words, File dirToScan, File outputFile) throws Exception
	{
		this.iStop = false;
		this.myProgress.setString("");
		this.myProgress.setValue(0);

		if (!dirToScan.exists()) { throw new Exception("Could not find: "
				+ dirToScan); }
		if (!dirToScan.isDirectory()) { throw new Exception(
				"Root file must be a directory"); }

		System.out.println("RootString: " + dirToScan);
		System.out.println("outputFile: " + outputFile);

		if (this.iStop) { return; }
		PrintWriter out = null;
		try
		{
			out = new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
			
			// First line is the directory we are going to scan.
			out.print(TREE_BASE);
			out.println(dirToScan.getAbsolutePath());
			
			// Second Line is the list of words to count.
			out.print("File");
			out.print(FILE_TOKEN);
			for (int wordIndex = 0; wordIndex < words.length; wordIndex++)
			{
				if (wordIndex > 0)
				{
					out.print(',');
				}
				out.print(words[wordIndex]);
			}
			out.println("");

			doDirectory(out, dirToScan, words);
		}
		catch (Exception ex)
		{
			ex.printStackTrace();
			throw new Exception("Problem writing to: " + outputFile);
		}
		finally
		{
			try
			{
				out.close();
			}
			catch (Exception ex)
			{
				ex.printStackTrace();
			}
		}
		System.out.println("Completed outputFile: " + outputFile);

		this.myProgress.setValue(this.myProgress.getMaximum());
		this.myProgress.setString("Done");
	}

	void doDirectory(PrintWriter out, File dirToScan, String[] words)
			throws Exception
	{
		File[] files = dirToScan.listFiles();
		this.myProgress.setMaximum(files.length);
		for (int i = 0; i < files.length; i++)
		{
			this.myProgress.setValue(i + 1);
			if (files[i].isDirectory())
			{
				if (this.iStop) { return; }
				doDirectory(out, files[i], words);
			}
			else if (files[i].isFile())
			{
				if (this.iStop) { return; }
				String ending = FileUtil.getExtension(files[i].getName());
				if (ending != null)
				{
					ending = ending.toLowerCase();
					for (int j = 0; j < ENDINGS.length; j++)
					{
						if (ENDINGS[j].equals(ending))
						{
							doFile(out, files[i], words);
							break;
						}
					}
				}
			}
		}
	}

	void doFile(PrintWriter out, File aFile, String[] words) throws Exception
	{
		int[] counts = new int[words.length];
		//System.out.println("Doing file: " + aFile.getAbsolutePath());	
		this.myProgress.setString(aFile.getAbsolutePath());
		String text = FileUtil.getStringFromFile(aFile);

		// This splits on word boundaries.
		String[] result = text.split("\\W+");
		for (int i = 0; i < result.length; i++)
		{
			//System.out.println(result[i]);					
			for (int wordIndex = 0; wordIndex < words.length; wordIndex++)
			{
				if (this.iStop) { return; }
				if (result[i].toLowerCase().equals(words[wordIndex]))
				{
					counts[wordIndex]++;
				}
			}
		}
		out.print(aFile.getAbsolutePath());
		out.print(FILE_TOKEN);
		for (int wordIndex = 0; wordIndex < words.length; wordIndex++)
		{
			if (wordIndex > 0)
			{
				out.print(',');
			}
			out.print(counts[wordIndex]);
		}
		out.println("");
	}

}
