001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 021 /** 022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 023 */ 024 package org.sonar.duplications.cpd; 025 026 import net.sourceforge.pmd.cpd.*; 027 028 import java.util.*; 029 030 public class MatchAlgorithm { 031 032 private static final int MOD = 37; 033 private int lastHash; 034 private int lastMod = 1; 035 036 private List<Match> matches; 037 private Map<String, SourceCode> source; 038 private Tokens tokens; 039 private List<TokenEntry> code; 040 private CPDListener cpdListener; 041 private int min; 042 private boolean loadSourceCodeSlices = true; 043 044 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) { 045 this(sourceCode, tokens, min, new CPDNullListener()); 046 } 047 048 public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) { 049 this.source = sourceCode; 050 this.tokens = tokens; 051 this.code = tokens.getTokens(); 052 this.min = min; 053 this.cpdListener = listener; 054 for (int i = 0; i < min; i++) { 055 lastMod *= MOD; 056 } 057 } 058 059 public void setLoadSourceCodeSlices(boolean loadSourceCodeSlices) { 060 this.loadSourceCodeSlices = loadSourceCodeSlices; 061 } 062 063 public void setListener(CPDListener listener) { 064 this.cpdListener = listener; 065 } 066 067 public Iterator<Match> matches() { 068 return matches.iterator(); 069 } 070 071 public TokenEntry tokenAt(int offset, TokenEntry m) { 072 return code.get(offset + m.getIndex()); 073 } 074 075 public int getMinimumTileSize() { 076 return this.min; 077 } 078 079 public void findMatches() { 080 cpdListener.phaseUpdate(CPDListener.HASH); 081 Map<TokenEntry, Object> markGroups = hash(); 082 083 cpdListener.phaseUpdate(CPDListener.MATCH); 084 MatchCollector matchCollector = new MatchCollector(this); 085 for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) { 086 Object o = i.next(); 087 if (o instanceof List) { 088 List<TokenEntry> l = (List<TokenEntry>) o; 089 090 Collections.reverse(l); 091 matchCollector.collect(l); 092 } 093 i.remove(); 094 } 095 cpdListener.phaseUpdate(CPDListener.GROUPING); 096 matches = matchCollector.getMatches(); 097 098 for (Match match : matches) { 099 for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences.hasNext();) { 100 TokenEntry mark = occurrences.next(); 101 match.setLineCount(tokens.getLineCount(mark, match)); 102 if (loadSourceCodeSlices && !occurrences.hasNext()) { 103 int start = mark.getBeginLine(); 104 int end = start + match.getLineCount() - 1; 105 SourceCode sourceCode = source.get(mark.getTokenSrcID()); 106 match.setSourceCodeSlice(sourceCode.getSlice(start, end)); 107 } 108 } 109 } 110 cpdListener.phaseUpdate(CPDListener.DONE); 111 } 112 113 private Map<TokenEntry, Object> hash() { 114 Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size()); 115 for (int i = code.size() - 1; i >= 0; i--) { 116 TokenEntry token = code.get(i); 117 if (token != TokenEntry.EOF) { 118 int last = tokenAt(min, token).getIdentifier(); 119 lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last; 120 token.setHashCode(lastHash); 121 Object o = markGroups.get(token); 122 123 // Note that this insertion method is worthwhile since the vast majority 124 // markGroup keys will have only one value. 125 if (o == null) { 126 markGroups.put(token, token); 127 } else if (o instanceof TokenEntry) { 128 List<TokenEntry> l = new ArrayList<TokenEntry>(); 129 l.add((TokenEntry) o); 130 l.add(token); 131 markGroups.put(token, l); 132 } else { 133 List<TokenEntry> l = (List<TokenEntry>) o; 134 l.add(token); 135 } 136 } else { 137 lastHash = 0; 138 for (int end = Math.max(0, i - min + 1); i > end; i--) { 139 token = code.get(i - 1); 140 lastHash = MOD * lastHash + token.getIdentifier(); 141 if (token == TokenEntry.EOF) { 142 break; 143 } 144 } 145 } 146 } 147 return markGroups; 148 } 149 150 }