001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020
021 /**
022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023 */
024 package org.sonar.duplications.cpd;
025
026 import net.sourceforge.pmd.cpd.*;
027
028 import java.util.*;
029
030 public class MatchAlgorithm {
031
032 private static final int MOD = 37;
033 private int lastHash;
034 private int lastMod = 1;
035
036 private List<Match> matches;
037 private Map<String, SourceCode> source;
038 private Tokens tokens;
039 private List<TokenEntry> code;
040 private CPDListener cpdListener;
041 private int min;
042 private boolean loadSourceCodeSlices = true;
043
044 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
045 this(sourceCode, tokens, min, new CPDNullListener());
046 }
047
048 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
049 this.source = sourceCode;
050 this.tokens = tokens;
051 this.code = tokens.getTokens();
052 this.min = min;
053 this.cpdListener = listener;
054 for (int i = 0; i < min; i++) {
055 lastMod *= MOD;
056 }
057 }
058
059 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
060 this.loadSourceCodeSlices = loadSourceCodeSlices;
061 }
062
063 public void setListener(CPDListener listener) {
064 this.cpdListener = listener;
065 }
066
067 public Iterator<Match> matches() {
068 return matches.iterator();
069 }
070
071 public TokenEntry tokenAt(int offset, TokenEntry m) {
072 return code.get(offset + m.getIndex());
073 }
074
075 public int getMinimumTileSize() {
076 return this.min;
077 }
078
079 public void findMatches() {
080 cpdListener.phaseUpdate(CPDListener.HASH);
081 Map<TokenEntry, Object> markGroups = hash();
082
083 cpdListener.phaseUpdate(CPDListener.MATCH);
084 MatchCollector matchCollector = new MatchCollector(this);
085 for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
086 Object o = i.next();
087 if (o instanceof List) {
088 List<TokenEntry> l = (List<TokenEntry>) o;
089
090 Collections.reverse(l);
091 matchCollector.collect(l);
092 }
093 i.remove();
094 }
095 cpdListener.phaseUpdate(CPDListener.GROUPING);
096 matches = matchCollector.getMatches();
097
098 for (Match match : matches) {
099 for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
100 TokenEntry mark = occurrences.next();
101 match.setLineCount(tokens.getLineCount(mark, match));
102 if (loadSourceCodeSlices && !occurrences.hasNext()) {
103 int start = mark.getBeginLine();
104 int end = start + match.getLineCount() - 1;
105 SourceCode sourceCode = source.get(mark.getTokenSrcID());
106 match.setSourceCodeSlice(sourceCode.getSlice(start, end));
107 }
108 }
109 }
110 cpdListener.phaseUpdate(CPDListener.DONE);
111 }
112
113 private Map<TokenEntry, Object> hash() {
114 Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
115 for (int i = code.size() - 1; i >= 0; i--) {
116 TokenEntry token = code.get(i);
117 if (token != TokenEntry.EOF) {
118 int last = tokenAt(min, token).getIdentifier();
119 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
120 token.setHashCode(lastHash);
121 Object o = markGroups.get(token);
122
123 // Note that this insertion method is worthwhile since the vast majority
124 // markGroup keys will have only one value.
125 if (o == null) {
126 markGroups.put(token, token);
127 } else if (o instanceof TokenEntry) {
128 List<TokenEntry> l = new ArrayList<TokenEntry>();
129 l.add((TokenEntry) o);
130 l.add(token);
131 markGroups.put(token, l);
132 } else {
133 List<TokenEntry> l = (List<TokenEntry>) o;
134 l.add(token);
135 }
136 } else {
137 lastHash = 0;
138 for (int end = Math.max(0, i - min + 1); i > end; i--) {
139 token = code.get(i - 1);
140 lastHash = MOD * lastHash + token.getIdentifier();
141 if (token == TokenEntry.EOF) {
142 break;
143 }
144 }
145 }
146 }
147 return markGroups;
148 }
149
150 }