001    /**
002     * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003     */
004    package org.sonar.duplications.cpd;
005    
006    import net.sourceforge.pmd.cpd.*;
007    import net.sourceforge.pmd.util.FileFinder;
008    
009    import java.io.File;
010    import java.io.FileNotFoundException;
011    import java.io.IOException;
012    import java.util.*;
013    
014    public class CPD {
015    
016      private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
017      private CPDListener listener = new CPDNullListener();
018      private Tokens tokens = new Tokens();
019      private int minimumTileSize;
020      private MatchAlgorithm matchAlgorithm;
021      private Language language;
022      private boolean skipDuplicates;
023      public static boolean debugEnable = false;
024      private boolean loadSourceCodeSlices = true;
025      private String encoding = System.getProperty("file.encoding");
026    
027      public CPD(int minimumTileSize, Language language) {
028        TokenEntry.clearImages(); // workaround for bug 1947823
029        this.minimumTileSize = minimumTileSize;
030        this.language = language;
031      }
032    
033      public void skipDuplicates() {
034        this.skipDuplicates = true;
035      }
036    
037      public void setCpdListener(CPDListener cpdListener) {
038        this.listener = cpdListener;
039      }
040    
041      public void setEncoding(String encoding) {
042        this.encoding = encoding;
043      }
044    
045      public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
046        this.loadSourceCodeSlices = loadSourceCodeSlices;
047      }
048    
049      public void go() {
050        TokenEntry.clearImages();
051        matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
052        matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices);
053        matchAlgorithm.findMatches();
054      }
055    
056      public Iterator<Match> getMatches() {
057        return matchAlgorithm.matches();
058      }
059    
060      public void add(File file) throws IOException {
061        add(1, file);
062      }
063    
064      public void addAllInDirectory(String dir) throws IOException {
065        addDirectory(dir, false);
066      }
067    
068      public void addRecursively(String dir) throws IOException {
069        addDirectory(dir, true);
070      }
071    
072      public void add(List<File> files) throws IOException {
073        for (File f : files) {
074          add(files.size(), f);
075        }
076      }
077    
078      private void addDirectory(String dir, boolean recurse) throws IOException {
079        if ( !(new File(dir)).exists()) {
080          throw new FileNotFoundException("Couldn't find directory " + dir);
081        }
082        FileFinder finder = new FileFinder();
083        // TODO - could use SourceFileSelector here
084        add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
085      }
086    
087      private Set<String> current = new HashSet<String>();
088    
089      private void add(int fileCount, File file) throws IOException {
090    
091        if (skipDuplicates) {
092          // TODO refactor this thing into a separate class
093          String signature = file.getName() + '_' + file.length();
094          if (current.contains(signature)) {
095            System.err.println("Skipping " + file.getAbsolutePath()
096                + " since it appears to be a duplicate file and --skip-duplicate-files is set");
097            return;
098          }
099          current.add(signature);
100        }
101    
102        if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
103          System.err.println("Skipping " + file + " since it appears to be a symlink");
104          return;
105        }
106    
107        listener.addedFile(fileCount, file);
108        SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
109        language.getTokenizer().tokenize(sourceCode, tokens);
110        source.put(sourceCode.getFileName(), sourceCode);
111      }
112    
113    
114    }