001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2011 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020
021 /**
022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023 */
024 package org.sonar.duplications.cpd;
025
026 import net.sourceforge.pmd.cpd.*;
027 import net.sourceforge.pmd.util.FileFinder;
028
029 import java.io.File;
030 import java.io.FileNotFoundException;
031 import java.io.IOException;
032 import java.util.*;
033
034 public class CPD {
035
036 private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
037 private CPDListener listener = new CPDNullListener();
038 private Tokens tokens = new Tokens();
039 private int minimumTileSize;
040 private MatchAlgorithm matchAlgorithm;
041 private Language language;
042 private boolean skipDuplicates;
043 public static boolean debugEnable = false;
044 private boolean loadSourceCodeSlices = true;
045 private String encoding = System.getProperty("file.encoding");
046
047 public CPD(int minimumTileSize, Language language) {
048 TokenEntry.clearImages(); // workaround for bug 1947823
049 this.minimumTileSize = minimumTileSize;
050 this.language = language;
051 }
052
053 public void skipDuplicates() {
054 this.skipDuplicates = true;
055 }
056
057 public void setCpdListener(CPDListener cpdListener) {
058 this.listener = cpdListener;
059 }
060
061 public void setEncoding(String encoding) {
062 this.encoding = encoding;
063 }
064
065 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
066 this.loadSourceCodeSlices = loadSourceCodeSlices;
067 }
068
069 public void go() {
070 TokenEntry.clearImages();
071 matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
072 matchAlgorithm.setLoadSourceCodeSlices(loadSourceCodeSlices);
073 matchAlgorithm.findMatches();
074 }
075
076 public Iterator<Match> getMatches() {
077 return matchAlgorithm.matches();
078 }
079
080 public void add(File file) throws IOException {
081 add(1, file);
082 }
083
084 public void addAllInDirectory(String dir) throws IOException {
085 addDirectory(dir, false);
086 }
087
088 public void addRecursively(String dir) throws IOException {
089 addDirectory(dir, true);
090 }
091
092 public void add(List<File> files) throws IOException {
093 for (File f : files) {
094 add(files.size(), f);
095 }
096 }
097
098 private void addDirectory(String dir, boolean recurse) throws IOException {
099 if ( !(new File(dir)).exists()) {
100 throw new FileNotFoundException("Couldn't find directory " + dir);
101 }
102 FileFinder finder = new FileFinder();
103 // TODO - could use SourceFileSelector here
104 add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
105 }
106
107 private Set<String> current = new HashSet<String>();
108
109 private void add(int fileCount, File file) throws IOException {
110
111 if (skipDuplicates) {
112 // TODO refactor this thing into a separate class
113 String signature = file.getName() + '_' + file.length();
114 if (current.contains(signature)) {
115 System.err.println("Skipping " + file.getAbsolutePath()
116 + " since it appears to be a duplicate file and --skip-duplicate-files is set");
117 return;
118 }
119 current.add(signature);
120 }
121
122 if ( !file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
123 System.err.println("Skipping " + file + " since it appears to be a symlink");
124 return;
125 }
126
127 listener.addedFile(fileCount, file);
128 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
129 language.getTokenizer().tokenize(sourceCode, tokens);
130 source.put(sourceCode.getFileName(), sourceCode);
131 }
132
133
134 }