001 /**
002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003 */
004 package org.sonar.duplications.cpd;
005
006 import net.sourceforge.pmd.cpd.*;
007 import net.sourceforge.pmd.util.FileFinder;
008
009 import java.io.File;
010 import java.io.FileNotFoundException;
011 import java.io.IOException;
012 import java.util.*;
013
014 public class CPD {
015
016 private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
017 private CPDListener listener = new CPDNullListener();
018 private Tokens tokens = new Tokens();
019 private int minimumTileSize;
020 private MatchAlgorithm matchAlgorithm;
021 private Language language;
022 private boolean skipDuplicates;
023 public static boolean debugEnable = false;
024 private boolean loadSourceCodeSlices = true;
025 private String encoding = System.getProperty("file.encoding");
026
027 public CPD(int minimumTileSize, Language language) {
028 TokenEntry.clearImages(); // workaround for bug 1947823
029 this.minimumTileSize = minimumTileSize;
030 this.language = language;
031 }
032
033 public void skipDuplicates() {
034 this.skipDuplicates = true;
035 }
036
037 public void setCpdListener(CPDListener cpdListener) {
038 this.listener = cpdListener;
039 }
040
041 public void setEncoding(String encoding) {
042 this.encoding = encoding;
043 }
044
045 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
046 this.loadSourceCodeSlices = loadSourceCodeSlices;
047 }
048
049 public void go() {
050 TokenEntry.clearImages();
051 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
052 matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices);
053 matchAlgorithm.findMatches();
054 }
055
056 public Iterator<Match> getMatches() {
057 return matchAlgorithm.matches();
058 }
059
060 public void add(File file) throws IOException {
061 add(1, file);
062 }
063
064 public void addAllInDirectory(String dir) throws IOException {
065 addDirectory(dir, false);
066 }
067
068 public void addRecursively(String dir) throws IOException {
069 addDirectory(dir, true);
070 }
071
072 public void add(List<File> files) throws IOException {
073 for (File f : files) {
074 add(files.size(), f);
075 }
076 }
077
078 private void addDirectory(String dir, boolean recurse) throws IOException {
079 if ( !(new File(dir)).exists()) {
080 throw new FileNotFoundException("Couldn't find directory " + dir);
081 }
082 FileFinder finder = new FileFinder();
083 // TODO - could use SourceFileSelector here
084 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
085 }
086
087 private Set<String> current = new HashSet<String>();
088
089 private void add(int fileCount, File file) throws IOException {
090
091 if (skipDuplicates) {
092 // TODO refactor this thing into a separate class
093 String signature = file.getName() + '_' + file.length();
094 if (current.contains(signature)) {
095 System.err.println("Skipping " + file.getAbsolutePath()
096 + " since it appears to be a duplicate file and --skip-duplicate-files is set");
097 return;
098 }
099 current.add(signature);
100 }
101
102 if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
103 System.err.println("Skipping " + file + " since it appears to be a symlink");
104 return;
105 }
106
107 listener.addedFile(fileCount, file);
108 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
109 language.getTokenizer().tokenize(sourceCode, tokens);
110 source.put(sourceCode.getFileName(), sourceCode);
111 }
112
113
114 }