001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 021 /** 022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 023 */ 024 package org.sonar.duplications.cpd; 025 026 import net.sourceforge.pmd.cpd.*; 027 import net.sourceforge.pmd.util.FileFinder; 028 029 import java.io.File; 030 import java.io.FileNotFoundException; 031 import java.io.IOException; 032 import java.util.*; 033 034 public class CPD { 035 036 private Map<String, SourceCode> source = new HashMap<String, SourceCode>(); 037 private CPDListener listener = new CPDNullListener(); 038 private Tokens tokens = new Tokens(); 039 private int minimumTileSize; 040 private MatchAlgorithm matchAlgorithm; 041 private Language language; 042 private boolean skipDuplicates; 043 public static boolean debugEnable = false; 044 private boolean loadSourceCodeSlices = true; 045 private String encoding = System.getProperty("file.encoding"); 046 047 public CPD(int minimumTileSize, Language language) { 048 TokenEntry.clearImages(); // workaround for bug 1947823 049 this.minimumTileSize = minimumTileSize; 050 this.language = language; 051 } 052 053 public void skipDuplicates() { 054 this.skipDuplicates = true; 055 } 056 057 public void setCpdListener(CPDListener cpdListener) { 058 this.listener = cpdListener; 059 } 060 061 public void setEncoding(String encoding) { 062 this.encoding = encoding; 063 } 064 065 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { 066 this.loadSourceCodeSlices = loadSourceCodeSlices; 067 } 068 069 public void go() { 070 TokenEntry.clearImages(); 071 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener); 072 matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices); 073 matchAlgorithm.findMatches(); 074 } 075 076 public Iterator<Match> getMatches() { 077 return matchAlgorithm.matches(); 078 } 079 080 public void add(File file) throws IOException { 081 add(1, file); 082 } 083 084 public void addAllInDirectory(String dir) throws IOException { 085 addDirectory(dir, false); 086 } 087 088 public void addRecursively(String dir) throws IOException { 089 addDirectory(dir, true); 090 } 091 092 public void add(List<File> files) throws IOException { 093 for (File f : files) { 094 add(files.size(), f); 095 } 096 } 097 098 private void addDirectory(String dir, boolean recurse) throws IOException { 099 if ( !(new File(dir)).exists()) { 100 throw new FileNotFoundException("Couldn't find directory " + dir); 101 } 102 FileFinder finder = new FileFinder(); 103 // TODO - could use SourceFileSelector here 104 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse)); 105 } 106 107 private Set<String> current = new HashSet<String>(); 108 109 private void add(int fileCount, File file) throws IOException { 110 111 if (skipDuplicates) { 112 // TODO refactor this thing into a separate class 113 String signature = file.getName() + '_' + file.length(); 114 if (current.contains(signature)) { 115 System.err.println("Skipping " + file.getAbsolutePath() 116 + " since it appears to be a duplicate file and --skip-duplicate-files is set"); 117 return; 118 } 119 current.add(signature); 120 } 121 122 if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) { 123 System.err.println("Skipping " + file + " since it appears to be a symlink"); 124 return; 125 } 126 127 listener.addedFile(fileCount, file); 128 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding)); 129 language.getTokenizer().tokenize(sourceCode, tokens); 130 source.put(sourceCode.getFileName(), sourceCode); 131 } 132 133 134 }