001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.IOException; 020import java.time.LocalDate; 021import java.time.ZoneOffset; 022import java.util.Arrays; 023import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 024import org.apache.lucene.document.Document; 025import org.apache.lucene.document.Field; 026import org.apache.lucene.document.FloatPoint; 027import org.apache.lucene.document.IntPoint; 028import org.apache.lucene.document.LongPoint; 029import org.apache.lucene.document.StringField; 030import org.apache.lucene.facet.FacetResult; 031import org.apache.lucene.facet.Facets; 032import org.apache.lucene.facet.FacetsCollector; 033import org.apache.lucene.facet.FacetsCollectorManager; 034import org.apache.lucene.facet.facetset.DimRange; 035import org.apache.lucene.facet.facetset.ExactFacetSetMatcher; 036import org.apache.lucene.facet.facetset.FacetSet; 037import org.apache.lucene.facet.facetset.FacetSetDecoder; 038import org.apache.lucene.facet.facetset.FacetSetMatcher; 039import org.apache.lucene.facet.facetset.FacetSetsField; 040import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts; 041import org.apache.lucene.facet.facetset.RangeFacetSetMatcher; 042import org.apache.lucene.index.DirectoryReader; 043import org.apache.lucene.index.IndexWriter; 044import org.apache.lucene.index.IndexWriterConfig; 045import org.apache.lucene.index.IndexWriterConfig.OpenMode; 046import org.apache.lucene.search.BooleanClause; 047import org.apache.lucene.search.BooleanQuery; 048import org.apache.lucene.search.IndexSearcher; 049import org.apache.lucene.search.MatchAllDocsQuery; 050import org.apache.lucene.search.Query; 051import org.apache.lucene.search.TermInSetQuery; 052import org.apache.lucene.store.ByteBuffersDirectory; 053import org.apache.lucene.store.Directory; 054import org.apache.lucene.util.BytesRef; 055import org.apache.lucene.util.NumericUtils; 056 057/** 058 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet} 059 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows 060 * how to mix and match dimensions of different types, as well as implementing a custom {@link 061 * FacetSetMatcher}. 062 */ 063public class CustomFacetSetExample { 064 065 private static final long MAY_SECOND_2022 = date("2022-05-02"); 066 private static final long JUNE_SECOND_2022 = date("2022-06-02"); 067 private static final long JULY_SECOND_2022 = date("2022-07-02"); 068 private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120); 069 private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100); 070 private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80); 071 072 private final Directory indexDir = new ByteBuffersDirectory(); 073 074 /** Empty constructor */ 075 public CustomFacetSetExample() {} 076 077 /** Build the example index. */ 078 private void index() throws IOException { 079 IndexWriter indexWriter = 080 new IndexWriter( 081 indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); 082 083 // Every document holds the temperature measures for a City by Date 084 085 Document doc = new Document(); 086 doc.add(new StringField("city", "city1", Field.Store.YES)); 087 doc.add( 088 FacetSetsField.create( 089 "temperature", 090 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES), 091 new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES), 092 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 093 addFastMatchFields(doc); 094 indexWriter.addDocument(doc); 095 096 doc = new Document(); 097 doc.add(new StringField("city", "city2", Field.Store.YES)); 098 doc.add( 099 FacetSetsField.create( 100 "temperature", 101 new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES), 102 new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES), 103 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 104 addFastMatchFields(doc); 105 indexWriter.addDocument(doc); 106 107 indexWriter.close(); 108 } 109 110 private void addFastMatchFields(Document doc) { 111 // day field 112 doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO)); 113 doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO)); 114 doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO)); 115 116 // temp field 117 doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO)); 118 doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO)); 119 doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO)); 120 } 121 122 /** Counting documents which exactly match a given {@link FacetSet}. */ 123 private FacetResult exactMatching() throws IOException { 124 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 125 IndexSearcher searcher = new IndexSearcher(indexReader); 126 127 // MatchAllDocsQuery is for "browsing" (counts facets 128 // for all non-deleted docs in the index); normally 129 // you'd use a "normal" query: 130 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 131 132 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 133 Facets facets = 134 new MatchingFacetSetsCounts( 135 "temperature", 136 fc, 137 TemperatureReadingFacetSet::decodeTemperatureReading, 138 new ExactFacetSetMatcher( 139 "May 2022 (100f)", 140 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 141 new ExactFacetSetMatcher( 142 "July 2022 (120f)", 143 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 144 145 // Retrieve results 146 return facets.getAllChildren("temperature"); 147 } 148 } 149 150 /** 151 * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates 152 * how to use a fast match query to improve the counting efficiency by skipping over documents 153 * which cannot possibly match a set. 154 */ 155 private FacetResult exactMatchingWithFastMatchQuery() throws IOException { 156 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 157 IndexSearcher searcher = new IndexSearcher(indexReader); 158 159 // MatchAllDocsQuery is for "browsing" (counts facets 160 // for all non-deleted docs in the index); normally 161 // you'd use a "normal" query: 162 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 163 164 // Match documents whose "day" field is either "May 2022" or "July 2022" 165 Query dateQuery = 166 new TermInSetQuery( 167 "day", 168 Arrays.asList( 169 new BytesRef(String.valueOf(MAY_SECOND_2022)), 170 new BytesRef(String.valueOf(JULY_SECOND_2022)))); 171 // Match documents whose "temp" field is either "80" or "120" degrees 172 Query temperatureQuery = 173 new TermInSetQuery( 174 "temp", 175 Arrays.asList( 176 new BytesRef(String.valueOf(HUNDRED_DEGREES)), 177 new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES)))); 178 // Documents must match both clauses 179 Query fastMatchQuery = 180 new BooleanQuery.Builder() 181 .add(dateQuery, BooleanClause.Occur.MUST) 182 .add(temperatureQuery, BooleanClause.Occur.MUST) 183 .build(); 184 185 // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions 186 Facets facets = 187 new MatchingFacetSetsCounts( 188 "temperature", 189 fc, 190 TemperatureReadingFacetSet::decodeTemperatureReading, 191 fastMatchQuery, 192 new ExactFacetSetMatcher( 193 "May 2022 (100f)", 194 new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)), 195 new ExactFacetSetMatcher( 196 "July 2022 (120f)", 197 new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES))); 198 199 // Retrieve results 200 return facets.getAllChildren("temperature"); 201 } 202 } 203 /** Counting documents which match a certain degrees value for any date. */ 204 private FacetResult rangeMatching() throws IOException { 205 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 206 IndexSearcher searcher = new IndexSearcher(indexReader); 207 208 // MatchAllDocsQuery is for "browsing" (counts facets 209 // for all non-deleted docs in the index); normally 210 // you'd use a "normal" query: 211 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 212 213 // Count 80-100 degrees 214 Facets facets = 215 new MatchingFacetSetsCounts( 216 "temperature", 217 fc, 218 TemperatureReadingFacetSet::decodeTemperatureReading, 219 new RangeFacetSetMatcher( 220 "Eighty to Hundred Degrees", 221 DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true), 222 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 223 224 // Retrieve results 225 return facets.getAllChildren("temperature"); 226 } 227 } 228 229 /** 230 * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link 231 * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature 232 * one). 233 */ 234 private FacetResult customRangeMatching() throws IOException { 235 try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) { 236 IndexSearcher searcher = new IndexSearcher(indexReader); 237 238 // MatchAllDocsQuery is for "browsing" (counts facets 239 // for all non-deleted docs in the index); normally 240 // you'd use a "normal" query: 241 FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager()); 242 243 // Count 80-100 degrees 244 Facets facets = 245 new MatchingFacetSetsCounts( 246 "temperature", 247 fc, 248 TemperatureReadingFacetSet::decodeTemperatureReading, 249 new TemperatureOnlyFacetSetMatcher( 250 "Eighty to Hundred Degrees", 251 DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true))); 252 253 // Retrieve results 254 return facets.getAllChildren("temperature"); 255 } 256 } 257 258 private static long date(String dateString) { 259 return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); 260 } 261 262 private static float fahrenheitToCelsius(int degrees) { 263 return (degrees - 32.0f) * 5.f / 9.f; 264 } 265 266 /** Runs the exact matching example. */ 267 public FacetResult runExactMatching() throws IOException { 268 index(); 269 return exactMatching(); 270 } 271 272 /** Runs the exact matching with fast match query example. */ 273 public FacetResult runExactMatchingWithFastMatchQuery() throws IOException { 274 index(); 275 return exactMatchingWithFastMatchQuery(); 276 } 277 278 /** Runs the range matching example. */ 279 public FacetResult runRangeMatching() throws IOException { 280 index(); 281 return rangeMatching(); 282 } 283 284 /** Runs the custom range matching example. */ 285 public FacetResult runCustomRangeMatching() throws IOException { 286 index(); 287 return customRangeMatching(); 288 } 289 290 /** Runs the search and drill-down examples and prints the results. */ 291 public static void main(String[] args) throws Exception { 292 CustomFacetSetExample example = new CustomFacetSetExample(); 293 294 System.out.println("Exact Facet Set matching example:"); 295 System.out.println("-----------------------"); 296 FacetResult result = example.runExactMatching(); 297 System.out.println("Temperature Reading: " + result); 298 299 System.out.println("Exact Facet Set matching with fast match query example:"); 300 System.out.println("-----------------------"); 301 result = example.runExactMatchingWithFastMatchQuery(); 302 System.out.println("Temperature Reading: " + result); 303 304 System.out.println("Range Facet Set matching example:"); 305 System.out.println("-----------------------"); 306 result = example.runRangeMatching(); 307 System.out.println("Temperature Reading: " + result); 308 309 System.out.println("Custom Range Facet Set matching example:"); 310 System.out.println("-----------------------"); 311 result = example.runCustomRangeMatching(); 312 System.out.println("Temperature Reading: " + result); 313 } 314 315 /** 316 * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius; 317 * float). 318 */ 319 public static class TemperatureReadingFacetSet extends FacetSet { 320 321 private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES; 322 323 private final long date; 324 private final float degrees; 325 326 /** Constructor */ 327 public TemperatureReadingFacetSet(long date, float degrees) { 328 super(2); // We encode two dimensions 329 330 this.date = date; 331 this.degrees = degrees; 332 } 333 334 @Override 335 public long[] getComparableValues() { 336 return new long[] {date, NumericUtils.floatToSortableInt(degrees)}; 337 } 338 339 @Override 340 public int packValues(byte[] buf, int start) { 341 LongPoint.encodeDimension(date, buf, start); 342 // Encode 'degrees' as a sortable integer. 343 FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES); 344 return sizePackedBytes(); 345 } 346 347 @Override 348 public int sizePackedBytes() { 349 return SIZE_PACKED_BYTES; 350 } 351 352 /** 353 * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link 354 * TemperatureReadingFacetSet}. 355 */ 356 public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) { 357 dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start); 358 // Decode the degrees as a sortable integer. 359 dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES); 360 return SIZE_PACKED_BYTES; 361 } 362 } 363 364 /** 365 * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension, 366 * ignoring the date. 367 */ 368 public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher { 369 370 private final DimRange temperatureRange; 371 372 /** Constructor */ 373 protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) { 374 super(label, 1); // We only evaluate one dimension 375 376 this.temperatureRange = temperatureRange; 377 } 378 379 @Override 380 public boolean matches(long[] dimValues) { 381 return temperatureRange.min <= dimValues[1] && temperatureRange.max >= dimValues[1]; 382 } 383 } 384}