package net.das.jarsearch;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.jar.JarInputStream;

import gnu.regexp.RE;
import gnu.regexp.REException;


/**
 * Searches a set of directories and archives for files whose names match a given
 * regular expression.  The search may optionally recurse directories or archives.
 *
 * @author <a href="doug@dseifert.net">Doug Seifert</a>
 */
public class JarSearch {
    private Set mInitialFiles;
    private Set mFiles;
    private RE mSearchRE;
    private boolean mRecurseDirectories = false;
    private boolean mRecurseArchives = false;


    /**
     * Create a search object that will search a set of directories, files and archives
     * for files whose path names match the provided regular expression.  The list
     * may contain files, in which case the search is made against the file itelf.  The
     * list may also contain directories, in which case the directory and optionally all
     * it's contents are searched.  Finally, the list may contain jar archives.  By default,
     * only archive contents are searched.  Optionally, archives within archives (archive
     * recursion) may be searched.
     *
     * @param aSearchTerm A string search expression that is turned into a
     *         gnu.regexp.RE object by invoking the RE(Object) constructor. 
     * @param lDirectories The set of files, directories and archives to search.
     * @throws REException If the search term can't be turned into a valid RE object.
     */
    public JarSearch(String aSearchTerm, Set lDirectories) throws REException {
        mSearchRE = new RE(aSearchTerm);
        mInitialFiles = lDirectories;
    }

    /**
     * Create a search object that will search a set of directories, files and archives
     * for files whose path names match the provided regular expression.  The list
     * may contain files, in which case the search is made against the file itelf.  The
     * list may also contain directories, in which case the directory and optionally all
     * it's contents are searched.  Finally, the list may contain jar archives.  By default,
     * only archive contents are searched.  Optionally, archives within archives (archive
     * recursion) may be searched.
     *
     * @param aSearchRE A gnu.regexp.RE used to match file path names against.
     * @param lDirectories The set of files, directories and archives to search.
     */
    public JarSearch(RE aSearchRE, Set lDirectories) {
        mSearchRE = aSearchRE;
        mInitialFiles = lDirectories;
    }

    /**
     * The main program for the JarSearch class.  This program may be invoked as follows:
     * <code>java net.das.jarsearch.JarSearch [-r] [-a] {regexpstring} [file ...]</code>
     * <ul>
     * <li>If the -r flag is provided, any directories provided as arguments will be recursed.</li>
     * <li>If the -a flag is provided, any archives provided as arguments, or any archives found
     * as a result of directory recursion, will be recursed</li>
     * </ul>
     *
     * <p>
     * Archive recursion means that archives nested with archives to any level will be searched.
     * </p>
     * <p>
     * This method will call System.exit(0) if the search was performed without an error.  If an
     * error occurs (for example, because the regexpstring can't be parsed into a valide
     * gnu.regexp.RE object), System.exit(1) will be called.  System.exit(2)
     * will be called in the event of a usage error.
     * </p>
     * <p>
     * Example:<br>
     * java net.das.jarsearch.JarSearch -a Bean foo.ear<br>
     * Will list all files whose names contain the substring 'Bean' in the given ear archive.
     * Module archives that might be contained in the ear will also be searched.
     * </p>
     *
     * @param args The command line arguments
     */
    public static void main(String[] args) {

        if (args.length < 2) {
            System.err.println("Usage: java " + JarSearch.class + " [-r] [-a] <regexpstring> [file ...]");
            System.err.println("    <regexpstring> is a string that can be parsed into a "
                + "valid gnu.regexp.RE object.");
            System.err.println("    zero or more directories may be specified. If none are specified,"
                + " the current working directory is searched.");
            System.exit(2);
        }

        try {
            int lArgIndex = 0;
            boolean lRecurseDirs = false;
            boolean lRecurseArchives = false;
            while (args[lArgIndex].startsWith("-")) {
                if ("-a".equals(args[lArgIndex])) {
                    lRecurseArchives = true;
                } else if ("-r".equals(args[lArgIndex])) {
                    System.out.println("Arg is -r, recurse dirs = true");
                    lRecurseDirs = true;
                }
                lArgIndex++;
            }

            if (args.length - lArgIndex < 2) {
                System.err.println("Usage: java " + JarSearch.class + " [-r] [-a] <regexpstring> [file ...]");
                System.err.println("    <regexpstring> is a string that can be parsed into a "
                    + "valid gnu.regexp.RE object.");
                System.err.println("    zero or more directories may be specified. If none are specified,"
                    + " the current working directory is searched.");
                System.exit(2);
            }

            String lSearchTerm = args[lArgIndex++];

            Set lDirectories = new TreeSet();
            for (int i = lArgIndex; i < args.length; ++i) {
                lDirectories.add(new File(args[i]));
            }

            JarSearch lSearch = new JarSearch(lSearchTerm, lDirectories);
            lSearch.setRecurseArchives(lRecurseArchives);
            lSearch.setRecurseDirectories(lRecurseDirs);

            System.out.println("Searching for '" + lSearchTerm + "' ...");
            List lResults = lSearch.execute();

            System.out.println("There are " + lResults.size() + " matches:");
            Iterator i = lResults.iterator();
            while (i.hasNext()) {
                String lMatch = (String) i.next();
                System.out.println(lMatch);
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        System.exit(0);
    }

    /**
     * Flag the search to recurse directories.
     *
     * @param aFlag Turn on or off directory recursion.
     */
    public void setRecurseDirectories(boolean aFlag) {
        mRecurseDirectories = aFlag;
    }

    /**
     * Flag the search to recurse archives.  If this is true,
     * archives within archives will be searched.
     *
     * @param aFlag Turn on or off archive recursion.
     */
    public void setRecurseArchives(boolean aFlag) {
        mRecurseArchives = aFlag;
    }

    /**
     * Perform the search and return a List of search results.  The result is a list
     * of String objects of the form:
     * <pre>
     * path/that/matched
     * path/of/archive/that/matched.jar
     * archive.jar>that/has/a/matching/file
     * an/archive.jar>within/an/archive.jar>that/has/a/match
     * ...
     * </pre>
     * @return A list of matches of the search expression
     */
    public List execute() {

        // Find all files first. Returns a list of all command line args
        // and their children, recursively, if the -r flag was provided
        mFiles = findAllFiles(mInitialFiles);

        List lMatches = new ArrayList();
        Iterator i = mFiles.iterator();
        while (i.hasNext()) {
            File lFile = (File) i.next();

            // Is the file an archive, perform an archive search
            if (isArchive(lFile.getName())) {
                try {
                    checkArchiveFile(lFile, lMatches);
                } catch (IOException ioe) {
                    System.out.println("Error checking archive: " + ioe);
                }
            } else {
                // It is a regular file, just match the path name against the RE
                if (mSearchRE.getMatch(lFile.getPath()) != null) {
                    lMatches.add(lFile.getPath());
                }
            }
        }
        return lMatches;
    }


    private void checkArchiveFile(File aFile, List aMatches)
        throws IOException {
        JarFile lJar = new JarFile(aFile);
        try {
            // Go through the jar entries looking for matches
            Enumeration lEntries = lJar.entries();
            while (lEntries.hasMoreElements()) {
                JarEntry lEntry = (JarEntry) lEntries.nextElement();
                checkEntry(lJar.getName(), lJar, lEntry, aMatches);
            }
        } finally {
            lJar.close();
        }
    }

    private void checkArchiveStream(String aPrefix, JarInputStream aStream, List aMatches) throws IOException {
        //System.out.println("Checking stream: " + aPrefix);
        JarEntry lEntry = null;
        while ((lEntry = aStream.getNextJarEntry()) != null) {

            try {
                checkEntry(aPrefix, null, lEntry, aMatches);

                if (isArchive(lEntry.getName()) && mRecurseArchives) {
                    // We have an archive within an archive, read the data and create a new
                    // Jar input stream for it.  We don't want to close this stream, because
                    // it is a substream of a larger open enclosing stream.
                    JarInputStream lNewStream = new JarInputStream(aStream);
                    checkArchiveStream(aPrefix + ">" + lEntry.getName(), lNewStream, aMatches);
                } else {
                    // Just read and discard the data to get to the next entry
                    byte [] lBuf = new byte[4096];
                    while (aStream.read(lBuf, 0, 4096) > 0) {
                        // do nothing, throw away the data
                    }
                }
            } finally {
                aStream.closeEntry();
            }
        }
    }

    private void checkEntry(String aPrefix, JarFile aOriginalFile, JarEntry aEntry, List aMatches) throws IOException {
        // If we are looking at an archive within a top-level (on the filesystem) archive,
        // open a stream and look inside it if the -a flag was specified.
        if (aOriginalFile != null && isArchive(aEntry.getName()) && mRecurseArchives) {
             JarInputStream lStream = new JarInputStream(aOriginalFile.getInputStream(aEntry));
             try {
                 checkArchiveStream(aPrefix + ">" + aEntry.getName(), lStream, aMatches);
             } finally {
                 lStream.close();
             }
        } else {
            if (mSearchRE.getMatch(aEntry.getName()) != null) {
                aMatches.add(aPrefix + ">" + aEntry.getName());
            }
        }
    }

    private boolean isArchive(String aName) {
        return (aName.toLowerCase().endsWith(".jar")
             || aName.toLowerCase().endsWith(".ear")
             || aName.toLowerCase().endsWith(".zip")
             || aName.toLowerCase().endsWith(".rar")
             || aName.toLowerCase().endsWith(".war"));
    }

    private Set findAllFiles(Set aBaseDirs) {
        // Find all the files in the input set, performs initial directory recursion
        Set lFiles = new TreeSet();
        Iterator i = aBaseDirs.iterator();
        while (i.hasNext()) {
            File lBaseDir = (File) i.next();
            if (lBaseDir.isDirectory() && mRecurseDirectories) {
                findRegularFiles(lBaseDir, lFiles);
                findArchives(lBaseDir, lFiles);
            }
            lFiles.add(lBaseDir);
        }
        return lFiles;
    }


    private void findArchives(File aDir, final Set aFiles) {
        File[] lArchiveFiles = aDir.listFiles(
            new FileFilter() {
                public boolean accept(File pathname) {
                    if (pathname.isDirectory()) {
                        findArchives(pathname, aFiles);
                        return false;
                    }
                    if (isArchive(pathname.getPath())) {
                        return true;
                    }
                    return false;
                }
            });
        if (lArchiveFiles != null) {
            for (int i = 0; i < lArchiveFiles.length; ++i) {
                aFiles.add(lArchiveFiles[i]);
            }
        }
    }


    private void findRegularFiles(File aDir, final Set aFiles) {
        File[] lRegularFiles = aDir.listFiles(
            new FileFilter() {
                public boolean accept(File pathname) {
                    if (pathname.isDirectory() && mRecurseDirectories) {
                        findRegularFiles(pathname, aFiles);
                        return false;
                    }
                    return true;
                }
            });
        if (lRegularFiles != null) {
            for (int i = 0; i < lRegularFiles.length; ++i) {
                aFiles.add(lRegularFiles[i]);
            }
        }
    }
}