001 /** 002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 003 */ 004 package org.sonar.duplications.cpd; 005 006 import net.sourceforge.pmd.cpd.*; 007 008 import java.util.*; 009 010 public class MatchAlgorithm { 011 012 private final static int MOD = 37; 013 private int lastHash; 014 private int lastMod = 1; 015 016 private List<Match> matches; 017 private Map<String, SourceCode> source; 018 private Tokens tokens; 019 private List<TokenEntry> code; 020 private CPDListener cpdListener; 021 private int min; 022 private boolean loadSourceCodeSlices = true; 023 024 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) { 025 this(sourceCode, tokens, min, new CPDNullListener()); 026 } 027 028 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) { 029 this.source = sourceCode; 030 this.tokens = tokens; 031 this.code = tokens.getTokens(); 032 this.min = min; 033 this.cpdListener = listener; 034 for (int i = 0; i < min; i++) { 035 lastMod *= MOD; 036 } 037 } 038 039 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { 040 this.loadSourceCodeSlices = loadSourceCodeSlices; 041 } 042 043 public void setListener(CPDListener listener) { 044 this.cpdListener = listener; 045 } 046 047 public Iterator<Match> matches() { 048 return matches.iterator(); 049 } 050 051 public TokenEntry tokenAt(int offset, TokenEntry m) { 052 return code.get(offset + m.getIndex()); 053 } 054 055 public int getMinimumTileSize() { 056 return this.min; 057 } 058 059 public void findMatches() { 060 cpdListener.phaseUpdate(CPDListener.HASH); 061 Map<TokenEntry, Object> markGroups = hash(); 062 063 cpdListener.phaseUpdate(CPDListener.MATCH); 064 MatchCollector matchCollector = new MatchCollector(this); 065 for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) { 066 Object o = i.next(); 067 if (o instanceof List) { 068 List<TokenEntry> l = (List<TokenEntry>) o; 069 070 Collections.reverse(l); 071 matchCollector.collect(l); 072 } 073 i.remove(); 074 } 075 cpdListener.phaseUpdate(CPDListener.GROUPING); 076 matches = matchCollector.getMatches(); 077 matchCollector = null; 078 for (Match match : matches) { 079 for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) { 080 TokenEntry mark = occurrences.next(); 081 match.setLineCount(tokens.getLineCount(mark, match)); 082 if (loadSourceCodeSlices && !occurrences.hasNext()) { 083 int start = mark.getBeginLine(); 084 int end = start + match.getLineCount() - 1; 085 SourceCode sourceCode = source.get(mark.getTokenSrcID()); 086 match.setSourceCodeSlice(sourceCode.getSlice(start, end)); 087 } 088 } 089 } 090 cpdListener.phaseUpdate(CPDListener.DONE); 091 } 092 093 @SuppressWarnings("PMD.JumbledIncrementer") 094 private Map<TokenEntry, Object> hash() { 095 Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size()); 096 for (int i = code.size() - 1; i >= 0; i--) { 097 TokenEntry token = code.get(i); 098 if (token != TokenEntry.EOF) { 099 int last = tokenAt(min, token).getIdentifier(); 100 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last; 101 token.setHashCode(lastHash); 102 Object o = markGroups.get(token); 103 104 // Note that this insertion method is worthwhile since the vast majority 105 // markGroup keys will have only one value. 106 if (o == null) { 107 markGroups.put(token, token); 108 } else if (o instanceof TokenEntry) { 109 List<TokenEntry> l = new ArrayList<TokenEntry>(); 110 l.add((TokenEntry) o); 111 l.add(token); 112 markGroups.put(token, l); 113 } else { 114 List<TokenEntry> l = (List<TokenEntry>) o; 115 l.add(token); 116 } 117 } else { 118 lastHash = 0; 119 for (int end = Math.max(0, i - min + 1); i > end; i--) { 120 token = code.get(i - 1); 121 lastHash = MOD * lastHash + token.getIdentifier(); 122 if (token == TokenEntry.EOF) { 123 break; 124 } 125 } 126 } 127 } 128 return markGroups; 129 } 130 131 }