001 /**
002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003 */
004 package org.sonar.duplications.cpd;
005
006 import net.sourceforge.pmd.cpd.*;
007
008 import java.util.*;
009
010 public class MatchAlgorithm {
011
012 private final static int MOD = 37;
013 private int lastHash;
014 private int lastMod = 1;
015
016 private List<Match> matches;
017 private Map<String, SourceCode> source;
018 private Tokens tokens;
019 private List<TokenEntry> code;
020 private CPDListener cpdListener;
021 private int min;
022 private boolean loadSourceCodeSlices = true;
023
024 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
025 this(sourceCode, tokens, min, new CPDNullListener());
026 }
027
028 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
029 this.source = sourceCode;
030 this.tokens = tokens;
031 this.code = tokens.getTokens();
032 this.min = min;
033 this.cpdListener = listener;
034 for (int i = 0; i < min; i++) {
035 lastMod *= MOD;
036 }
037 }
038
039 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
040 this.loadSourceCodeSlices = loadSourceCodeSlices;
041 }
042
043 public void setListener(CPDListener listener) {
044 this.cpdListener = listener;
045 }
046
047 public Iterator<Match> matches() {
048 return matches.iterator();
049 }
050
051 public TokenEntry tokenAt(int offset, TokenEntry m) {
052 return code.get(offset + m.getIndex());
053 }
054
055 public int getMinimumTileSize() {
056 return this.min;
057 }
058
059 public void findMatches() {
060 cpdListener.phaseUpdate(CPDListener.HASH);
061 Map<TokenEntry, Object> markGroups = hash();
062
063 cpdListener.phaseUpdate(CPDListener.MATCH);
064 MatchCollector matchCollector = new MatchCollector(this);
065 for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
066 Object o = i.next();
067 if (o instanceof List) {
068 List<TokenEntry> l = (List<TokenEntry>) o;
069
070 Collections.reverse(l);
071 matchCollector.collect(l);
072 }
073 i.remove();
074 }
075 cpdListener.phaseUpdate(CPDListener.GROUPING);
076 matches = matchCollector.getMatches();
077 matchCollector = null;
078 for (Match match : matches) {
079 for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
080 TokenEntry mark = occurrences.next();
081 match.setLineCount(tokens.getLineCount(mark, match));
082 if (loadSourceCodeSlices && !occurrences.hasNext()) {
083 int start = mark.getBeginLine();
084 int end = start + match.getLineCount() - 1;
085 SourceCode sourceCode = source.get(mark.getTokenSrcID());
086 match.setSourceCodeSlice(sourceCode.getSlice(start, end));
087 }
088 }
089 }
090 cpdListener.phaseUpdate(CPDListener.DONE);
091 }
092
093 @SuppressWarnings("PMD.JumbledIncrementer")
094 private Map<TokenEntry, Object> hash() {
095 Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
096 for (int i = code.size() - 1; i >= 0; i--) {
097 TokenEntry token = code.get(i);
098 if (token != TokenEntry.EOF) {
099 int last = tokenAt(min, token).getIdentifier();
100 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
101 token.setHashCode(lastHash);
102 Object o = markGroups.get(token);
103
104 // Note that this insertion method is worthwhile since the vast majority
105 // markGroup keys will have only one value.
106 if (o == null) {
107 markGroups.put(token, token);
108 } else if (o instanceof TokenEntry) {
109 List<TokenEntry> l = new ArrayList<TokenEntry>();
110 l.add((TokenEntry) o);
111 l.add(token);
112 markGroups.put(token, l);
113 } else {
114 List<TokenEntry> l = (List<TokenEntry>) o;
115 l.add(token);
116 }
117 } else {
118 lastHash = 0;
119 for (int end = Math.max(0, i - min + 1); i > end; i--) {
120 token = code.get(i - 1);
121 lastHash = MOD * lastHash + token.getIdentifier();
122 if (token == TokenEntry.EOF) {
123 break;
124 }
125 }
126 }
127 }
128 return markGroups;
129 }
130
131 }