001 /** 002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 003 */ 004 package org.sonar.duplications.cpd; 005 006 import net.sourceforge.pmd.cpd.*; 007 import net.sourceforge.pmd.util.FileFinder; 008 009 import java.io.File; 010 import java.io.FileNotFoundException; 011 import java.io.IOException; 012 import java.util.*; 013 014 public class CPD { 015 016 private Map<String, SourceCode> source = new HashMap<String, SourceCode>(); 017 private CPDListener listener = new CPDNullListener(); 018 private Tokens tokens = new Tokens(); 019 private int minimumTileSize; 020 private MatchAlgorithm matchAlgorithm; 021 private Language language; 022 private boolean skipDuplicates; 023 public static boolean debugEnable = false; 024 private boolean loadSourceCodeSlices = true; 025 private String encoding = System.getProperty("file.encoding"); 026 027 public CPD(int minimumTileSize, Language language) { 028 TokenEntry.clearImages(); // workaround for bug 1947823 029 this.minimumTileSize = minimumTileSize; 030 this.language = language; 031 } 032 033 public void skipDuplicates() { 034 this.skipDuplicates = true; 035 } 036 037 public void setCpdListener(CPDListener cpdListener) { 038 this.listener = cpdListener; 039 } 040 041 public void setEncoding(String encoding) { 042 this.encoding = encoding; 043 } 044 045 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { 046 this.loadSourceCodeSlices = loadSourceCodeSlices; 047 } 048 049 public void go() { 050 TokenEntry.clearImages(); 051 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener); 052 matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices); 053 matchAlgorithm.findMatches(); 054 } 055 056 public Iterator<Match> getMatches() { 057 return matchAlgorithm.matches(); 058 } 059 060 public void add(File file) throws IOException { 061 add(1, file); 062 } 063 064 public void addAllInDirectory(String dir) throws IOException { 065 addDirectory(dir, false); 066 } 067 068 public void addRecursively(String dir) throws IOException { 069 addDirectory(dir, true); 070 } 071 072 public void add(List<File> files) throws IOException { 073 for (File f : files) { 074 add(files.size(), f); 075 } 076 } 077 078 private void addDirectory(String dir, boolean recurse) throws IOException { 079 if ( !(new File(dir)).exists()) { 080 throw new FileNotFoundException("Couldn't find directory " + dir); 081 } 082 FileFinder finder = new FileFinder(); 083 // TODO - could use SourceFileSelector here 084 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse)); 085 } 086 087 private Set<String> current = new HashSet<String>(); 088 089 private void add(int fileCount, File file) throws IOException { 090 091 if (skipDuplicates) { 092 // TODO refactor this thing into a separate class 093 String signature = file.getName() + '_' + file.length(); 094 if (current.contains(signature)) { 095 System.err.println("Skipping " + file.getAbsolutePath() 096 + " since it appears to be a duplicate file and --skip-duplicate-files is set"); 097 return; 098 } 099 current.add(signature); 100 } 101 102 if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) { 103 System.err.println("Skipping " + file + " since it appears to be a symlink"); 104 return; 105 } 106 107 listener.addedFile(fileCount, file); 108 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding)); 109 language.getTokenizer().tokenize(sourceCode, tokens); 110 source.put(sourceCode.getFileName(), sourceCode); 111 } 112 113 114 }