001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    
021    /**
022     * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023     */
024    package org.sonar.duplications.cpd;
025    
026    import net.sourceforge.pmd.cpd.*;
027    
028    import java.util.*;
029    
030    public class MatchAlgorithm {
031    
032      private static final int MOD = 37;
033      private int lastHash;
034      private int lastMod = 1;
035    
036      private List<Match> matches;
037      private Map<String, SourceCode> source;
038      private Tokens tokens;
039      private List<TokenEntry> code;
040      private CPDListener cpdListener;
041      private int min;
042      private boolean loadSourceCodeSlices = true;
043    
044      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
045        this(sourceCode, tokens, min, new CPDNullListener());
046      }
047    
048      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
049        this.source = sourceCode;
050        this.tokens = tokens;
051        this.code = tokens.getTokens();
052        this.min = min;
053        this.cpdListener = listener;
054        for (int i = 0; i < min; i++) {
055          lastMod *= MOD;
056        }
057      }
058    
059      public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) {
060        this.loadSourceCodeSlices = loadSourceCodeSlices;
061      }
062    
063      public void setListener(CPDListener listener) {
064        this.cpdListener = listener;
065      }
066    
067      public Iterator<Match> matches() {
068        return matches.iterator();
069      }
070    
071      public TokenEntry tokenAt(int offset, TokenEntry m) {
072        return code.get(offset + m.getIndex());
073      }
074    
075      public int getMinimumTileSize() {
076        return this.min;
077      }
078    
079      public void findMatches() {
080        cpdListener.phaseUpdate(CPDListener.HASH);
081        Map<TokenEntry, Object> markGroups = hash();
082    
083        cpdListener.phaseUpdate(CPDListener.MATCH);
084        MatchCollector matchCollector = new MatchCollector(this);
085        for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
086          Object o = i.next();
087          if (o instanceof List) {
088            List<TokenEntry> l = (List<TokenEntry>) o;
089    
090            Collections.reverse(l);
091            matchCollector.collect(l);
092          }
093          i.remove();
094        }
095        cpdListener.phaseUpdate(CPDListener.GROUPING);
096        matches = matchCollector.getMatches();
097    
098        for (Match match : matches) {
099          for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) {
100            TokenEntry mark = occurrences.next();
101            match.setLineCount(tokens.getLineCount(mark, match));
102            if (loadSourceCodeSlices && !occurrences.hasNext()) {
103              int start = mark.getBeginLine();
104              int end = start + match.getLineCount() - 1;
105              SourceCode sourceCode = source.get(mark.getTokenSrcID());
106              match.setSourceCodeSlice(sourceCode.getSlice(start, end));
107            }
108          }
109        }
110        cpdListener.phaseUpdate(CPDListener.DONE);
111      }
112    
113      private Map<TokenEntry, Object> hash() {
114        Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
115        for (int i = code.size() - 1; i >= 0; i--) {
116          TokenEntry token = code.get(i);
117          if (token != TokenEntry.EOF) {
118            int last = tokenAt(min, token).getIdentifier();
119            lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
120            token.setHashCode(lastHash);
121            Object o = markGroups.get(token);
122    
123            // Note that this insertion method is worthwhile since the vast majority
124            // markGroup keys will have only one value.
125            if (o == null) {
126              markGroups.put(token, token);
127            } else if (o instanceof TokenEntry) {
128              List<TokenEntry> l = new ArrayList<TokenEntry>();
129              l.add((TokenEntry) o);
130              l.add(token);
131              markGroups.put(token, l);
132            } else {
133              List<TokenEntry> l = (List<TokenEntry>) o;
134              l.add(token);
135            }
136          } else {
137            lastHash = 0;
138            for (int end = Math.max(0, i - min + 1); i > end; i--) {
139              token = code.get(i - 1);
140              lastHash = MOD * lastHash + token.getIdentifier();
141              if (token == TokenEntry.EOF) {
142                break;
143              }
144            }
145          }
146        }
147        return markGroups;
148      }
149    
150    }