/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using IndexReader = Lucene.Net.Index.IndexReader; using Term = Lucene.Net.Index.Term; using PriorityQueue = Lucene.Net.Util.PriorityQueue; namespace Lucene.Net.Search { /// Implements the fuzzy search query. The similiarity measurement /// is based on the Levenshtein (edit distance) algorithm. /// [Serializable] public sealed class FuzzyQuery:MultiTermQuery { public const float defaultMinSimilarity = 0.5f; public const int defaultPrefixLength = 0; private float minimumSimilarity; private int prefixLength; /// Create a new FuzzyQuery that will match terms with a similarity /// of at least minimumSimilarity to term. /// If a prefixLength > 0 is specified, a common prefix /// of that length is also required. /// /// /// the term to search for /// /// a value between 0 and 1 to set the required similarity /// between the query term and the matching terms. For example, for a /// minimumSimilarity of 0.5 a term of the same length /// as the query term is considered similar to the query term if the edit distance /// between both terms is less than length(term)*0.5 /// /// length of common (non-fuzzy) prefix /// /// IllegalArgumentException if minimumSimilarity is >= 1 or < 0 /// or if prefixLength < 0 /// public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term) { if (minimumSimilarity >= 1.0f) throw new System.ArgumentException("minimumSimilarity >= 1"); else if (minimumSimilarity < 0.0f) throw new System.ArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new System.ArgumentException("prefixLength < 0"); this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; } /// Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength) { } /// Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength) { } /// Returns the minimum similarity that is required for this query to match. /// float value between 0.0 and 1.0 /// public float GetMinSimilarity() { return minimumSimilarity; } /// Returns the non-fuzzy prefix length. This is the number of characters at the start /// of a term that must be identical (not fuzzy) to the query term if the query /// is to match that term. /// public int GetPrefixLength() { return prefixLength; } protected internal override FilteredTermEnum GetEnum(IndexReader reader) { return new FuzzyTermEnum(reader, GetTerm(), minimumSimilarity, prefixLength); } public override Query Rewrite(IndexReader reader) { FilteredTermEnum enumerator = GetEnum(reader); int maxClauseCount = BooleanQuery.GetMaxClauseCount(); ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); try { do { float minScore = 0.0f; float score = 0.0f; Term t = enumerator.Term(); if (t != null) { score = enumerator.Difference(); // terms come in alphabetical order, therefore if queue is full and score // not bigger than minScore, we can skip if (stQueue.Size() < maxClauseCount || score > minScore) { stQueue.Insert(new ScoreTerm(t, score)); minScore = ((ScoreTerm) stQueue.Top()).score; // maintain minScore } } } while (enumerator.Next()); } finally { enumerator.Close(); } BooleanQuery query = new BooleanQuery(true); int size = stQueue.Size(); for (int i = 0; i < size; i++) { ScoreTerm st = (ScoreTerm) stQueue.Pop(); TermQuery tq = new TermQuery(st.term); // found a match tq.SetBoost(GetBoost() * st.score); // set the boost query.Add(tq, BooleanClause.Occur.SHOULD); // add to query } return query; } public override System.String ToString(System.String field) { return base.ToString(field) + '~' + minimumSimilarity.ToString(); } private class ScoreTerm { public Term term; public float score; public ScoreTerm(Term term, float score) { this.term = term; this.score = score; } } private class ScoreTermQueue:PriorityQueue { public ScoreTermQueue(int size) { Initialize(size); } /* (non-Javadoc) * @see Lucene.Net.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object) */ public override bool LessThan(System.Object a, System.Object b) { ScoreTerm termA = (ScoreTerm) a; ScoreTerm termB = (ScoreTerm) b; if (termA.score == termB.score) return termA.term.CompareTo(termB.term) > 0; else return termA.score < termB.score; } } public override bool Equals(System.Object o) { if (this == o) return true; if (!(o is FuzzyQuery)) return false; if (!base.Equals(o)) return false; FuzzyQuery fuzzyQuery = (FuzzyQuery) o; if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false; if (prefixLength != fuzzyQuery.prefixLength) return false; return true; } public override int GetHashCode() { int result = base.GetHashCode(); result = 29 * result + minimumSimilarity != + 0.0f ? BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) : 0; result = 29 * result + prefixLength; return result; } } }