001    /**
002     * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003     */
004    package org.sonar.duplications.cpd;
005    
006    import net.sourceforge.pmd.cpd.*;
007    
008    import java.util.*;
009    
010    public class MatchAlgorithm {
011    
012      private final static int MOD = 37;
013      private int lastHash;
014      private int lastMod = 1;
015    
016      private List<Match> matches;
017      private Map<String, SourceCode> source;
018      private Tokens tokens;
019      private List<TokenEntry> code;
020      private CPDListener cpdListener;
021      private int min;
022      private boolean loadSourceCodeSlices = true;
023    
024      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
025        this(sourceCode, tokens, min, new CPDNullListener());
026      }
027    
028      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
029        this.source = sourceCode;
030        this.tokens = tokens;
031        this.code = tokens.getTokens();
032        this.min = min;
033        this.cpdListener = listener;
034        for (int i = 0; i < min; i++) {
035          lastMod *= MOD;
036        }
037      }
038    
039      public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
040        this.loadSourceCodeSlices = loadSourceCodeSlices;
041      }
042    
043      public void setListener(CPDListener listener) {
044        this.cpdListener = listener;
045      }
046    
047      public Iterator<Match> matches() {
048        return matches.iterator();
049      }
050    
051      public TokenEntry tokenAt(int offset, TokenEntry m) {
052        return code.get(offset + m.getIndex());
053      }
054    
055      public int getMinimumTileSize() {
056        return this.min;
057      }
058    
059      public void findMatches() {
060        cpdListener.phaseUpdate(CPDListener.HASH);
061        Map<TokenEntry, Object> markGroups = hash();
062    
063        cpdListener.phaseUpdate(CPDListener.MATCH);
064        MatchCollector matchCollector = new MatchCollector(this);
065        for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
066          Object o = i.next();
067          if (o instanceof List) {
068            List<TokenEntry> l = (List<TokenEntry>) o;
069    
070            Collections.reverse(l);
071            matchCollector.collect(l);
072          }
073          i.remove();
074        }
075        cpdListener.phaseUpdate(CPDListener.GROUPING);
076        matches = matchCollector.getMatches();
077        matchCollector = null;
078        for (Match match : matches) {
079          for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
080            TokenEntry mark = occurrences.next();
081            match.setLineCount(tokens.getLineCount(mark, match));
082            if (loadSourceCodeSlices && !occurrences.hasNext()) {
083              int start = mark.getBeginLine();
084              int end = start + match.getLineCount() - 1;
085              SourceCode sourceCode = source.get(mark.getTokenSrcID());
086              match.setSourceCodeSlice(sourceCode.getSlice(start, end));
087            }
088          }
089        }
090        cpdListener.phaseUpdate(CPDListener.DONE);
091      }
092    
093      @SuppressWarnings("PMD.JumbledIncrementer")
094      private Map<TokenEntry, Object> hash() {
095        Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
096        for (int i = code.size() - 1; i >= 0; i--) {
097          TokenEntry token = code.get(i);
098          if (token != TokenEntry.EOF) {
099            int last = tokenAt(min, token).getIdentifier();
100            lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
101            token.setHashCode(lastHash);
102            Object o = markGroups.get(token);
103    
104            // Note that this insertion method is worthwhile since the vast majority
105            // markGroup keys will have only one value.
106            if (o == null) {
107              markGroups.put(token, token);
108            } else if (o instanceof TokenEntry) {
109              List<TokenEntry> l = new ArrayList<TokenEntry>();
110              l.add((TokenEntry) o);
111              l.add(token);
112              markGroups.put(token, l);
113            } else {
114              List<TokenEntry> l = (List<TokenEntry>) o;
115              l.add(token);
116            }
117          } else {
118            lastHash = 0;
119            for (int end = Math.max(0, i - min + 1); i > end; i--) {
120              token = code.get(i - 1);
121              lastHash = MOD * lastHash + token.getIdentifier();
122              if (token == TokenEntry.EOF) {
123                break;
124              }
125            }
126          }
127        }
128        return markGroups;
129      }
130    
131    }