001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 031import org.opencms.configuration.CmsConfigurationException; 032import org.opencms.db.CmsDriverManager; 033import org.opencms.db.CmsPublishedResource; 034import org.opencms.db.CmsResourceState; 035import org.opencms.file.CmsObject; 036import org.opencms.file.CmsProject; 037import org.opencms.file.CmsResource; 038import org.opencms.file.CmsResourceFilter; 039import org.opencms.file.CmsUser; 040import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 041import org.opencms.file.types.CmsResourceTypeXmlContent; 042import org.opencms.file.types.I_CmsResourceType; 043import org.opencms.i18n.CmsLocaleManager; 044import org.opencms.i18n.CmsMessageContainer; 045import org.opencms.loader.CmsLoaderException; 046import org.opencms.main.CmsBroadcast.ContentMode; 047import org.opencms.main.CmsEvent; 048import org.opencms.main.CmsException; 049import org.opencms.main.CmsIllegalArgumentException; 050import org.opencms.main.CmsIllegalStateException; 051import org.opencms.main.CmsLog; 052import org.opencms.main.I_CmsEventListener; 053import org.opencms.main.OpenCms; 054import org.opencms.main.OpenCmsSolrHandler; 055import org.opencms.relations.CmsRelation; 056import org.opencms.relations.CmsRelationFilter; 057import org.opencms.relations.CmsRelationType; 058import org.opencms.report.CmsLogReport; 059import org.opencms.report.CmsShellLogReport; 060import org.opencms.report.I_CmsReport; 061import org.opencms.scheduler.I_CmsScheduledJob; 062import org.opencms.search.documents.A_CmsVfsDocument; 063import org.opencms.search.documents.CmsExtractionResultCache; 064import org.opencms.search.documents.I_CmsDocumentFactory; 065import org.opencms.search.documents.I_CmsTermHighlighter; 066import org.opencms.search.fields.CmsLuceneField; 067import org.opencms.search.fields.CmsLuceneFieldConfiguration; 068import org.opencms.search.fields.CmsSearchField; 069import org.opencms.search.fields.CmsSearchFieldConfiguration; 070import org.opencms.search.fields.CmsSearchFieldMapping; 071import org.opencms.search.fields.I_CmsSearchFieldConfiguration; 072import org.opencms.search.solr.CmsSolrConfiguration; 073import org.opencms.search.solr.CmsSolrFieldConfiguration; 074import org.opencms.search.solr.CmsSolrIndex; 075import org.opencms.search.solr.I_CmsSolrIndexWriter; 076import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 077import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer; 078import org.opencms.security.CmsRole; 079import org.opencms.security.CmsRoleViolationException; 080import org.opencms.util.A_CmsModeStringEnumeration; 081import org.opencms.util.CmsFileUtil; 082import org.opencms.util.CmsStringUtil; 083import org.opencms.util.CmsUUID; 084import org.opencms.util.CmsWaitHandle; 085 086import java.io.File; 087import java.io.IOException; 088import java.nio.file.FileSystems; 089import java.nio.file.Paths; 090import java.util.ArrayList; 091import java.util.Collection; 092import java.util.Collections; 093import java.util.HashMap; 094import java.util.HashSet; 095import java.util.Iterator; 096import java.util.LinkedHashMap; 097import java.util.List; 098import java.util.ListIterator; 099import java.util.Locale; 100import java.util.Map; 101import java.util.Set; 102import java.util.TreeMap; 103import java.util.concurrent.locks.ReentrantLock; 104import java.util.stream.Collectors; 105 106import org.apache.commons.logging.Log; 107import org.apache.lucene.analysis.Analyzer; 108import org.apache.lucene.analysis.CharArraySet; 109import org.apache.lucene.analysis.standard.StandardAnalyzer; 110import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 111import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder; 112import org.apache.solr.core.CoreContainer; 113import org.apache.solr.core.CoreDescriptor; 114import org.apache.solr.core.SolrCore; 115 116/** 117 * Implements the general management and configuration of the search and 118 * indexing facilities in OpenCms.<p> 119 * 120 * @since 6.0.0 121 */ 122public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 123 124 /** 125 * Enumeration class for force unlock types.<p> 126 */ 127 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 128 129 /** Force unlock type "always". */ 130 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 131 132 /** Force unlock type "never". */ 133 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 134 135 /** Force unlock type "only full". */ 136 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 137 138 /** Serializable version id. */ 139 private static final long serialVersionUID = 74746076708908673L; 140 141 /** 142 * Creates a new force unlock type with the given name.<p> 143 * 144 * @param mode the mode id to use 145 */ 146 protected CmsSearchForceUnlockMode(String mode) { 147 148 super(mode); 149 } 150 151 /** 152 * Returns the lock type for the given type value.<p> 153 * 154 * @param type the type value to get the lock type for 155 * 156 * @return the lock type for the given type value 157 */ 158 public static CmsSearchForceUnlockMode valueOf(String type) { 159 160 if (type.equals(ALWAYS.toString())) { 161 return ALWAYS; 162 } else if (type.equals(NEVER.toString())) { 163 return NEVER; 164 } else { 165 return ONLYFULL; 166 } 167 } 168 } 169 170 /** 171 * Handles offline index generation.<p> 172 */ 173 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 174 175 /** Indicates if the event handlers for the offline search have been already registered. */ 176 private boolean m_isEventRegistered; 177 178 /** The list of resources to index. */ 179 private List<CmsPublishedResource> m_resourcesToIndex; 180 181 /** 182 * Initializes the offline index handler.<p> 183 */ 184 protected CmsSearchOfflineHandler() { 185 186 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 187 } 188 189 /** 190 * Implements the event listener of this class.<p> 191 * 192 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 193 */ 194 @SuppressWarnings("unchecked") 195 public void cmsEvent(CmsEvent event) { 196 197 switch (event.getType()) { 198 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 199 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 200 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 201 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 202 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 203 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 204 // skip lock & unlock 205 return; 206 } 207 // skip indexing if flag is set in event 208 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 209 if (skip != null) { 210 return; 211 } 212 213 // a resource has been modified - offline indexes require (re)indexing 214 List<CmsResource> resources = Collections.singletonList( 215 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 216 reIndexResources(resources); 217 break; 218 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 219 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 220 I_CmsEventListener.KEY_RESOURCES); 221 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 222 for (CmsResource res : resourcesToDelete) { 223 if (res.getState().isNew()) { 224 // if the resource is new and a delete action was performed 225 // --> set the state of the resource to deleted 226 res.setState(CmsResourceState.STATE_DELETED); 227 } 228 } 229 reIndexResources(resourcesToDelete); 230 break; 231 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 232 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 233 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 234 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 235 // a list of resources has been modified - offline indexes require (re)indexing 236 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 237 break; 238 default: 239 // no operation 240 } 241 } 242 243 /** 244 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 245 * 246 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 247 */ 248 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 249 250 m_resourcesToIndex.addAll(resourcesToIndex); 251 } 252 253 /** 254 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 255 * 256 * @return the resources to index 257 */ 258 protected List<CmsPublishedResource> getResourcesToIndex() { 259 260 List<CmsPublishedResource> result; 261 synchronized (this) { 262 result = m_resourcesToIndex; 263 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 264 } 265 try { 266 CmsObject cms = m_adminCms; 267 CmsProject offline = getOfflineIndexProject(); 268 if (offline != null) { 269 // switch to the offline project if available 270 cms = OpenCms.initCmsObject(m_adminCms); 271 cms.getRequestContext().setCurrentProject(offline); 272 } 273 addAdditionallyAffectedResources(cms, result); 274 } catch (CmsException e) { 275 LOG.error(e.getLocalizedMessage(), e); 276 } 277 return result; 278 } 279 280 /** 281 * Initializes this offline search handler, registering the event handlers if required.<p> 282 */ 283 protected void initialize() { 284 285 if (m_offlineIndexes.size() > 0) { 286 // there is at least one offline index configured 287 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 288 // create the offline indexing thread 289 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 290 // start the offline index thread 291 m_offlineIndexThread.start(); 292 } 293 } else { 294 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 295 // no offline indexes but thread still running, stop the thread 296 m_offlineIndexThread.shutDown(); 297 m_offlineIndexThread = null; 298 } 299 } 300 // do this only in case there are offline indexes configured 301 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 302 m_isEventRegistered = true; 303 // register this object as event listener 304 OpenCms.addCmsEventListener( 305 this, 306 new int[] { 307 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 308 I_CmsEventListener.EVENT_RESOURCE_CREATED, 309 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 310 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 311 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 312 I_CmsEventListener.EVENT_RESOURCE_MOVED, 313 I_CmsEventListener.EVENT_RESOURCE_DELETED, 314 I_CmsEventListener.EVENT_RESOURCE_COPIED, 315 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 316 } 317 } 318 319 /** 320 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 321 * 322 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 323 */ 324 protected synchronized void reIndexResources(List<CmsResource> resources) { 325 326 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 327 for (CmsResource res : resources) { 328 CmsPublishedResource pubRes = new CmsPublishedResource(res); 329 resourcesToIndex.add(pubRes); 330 } 331 if (resourcesToIndex.size() > 0) { 332 // add the resources found to the offline index thread 333 addResourcesToIndex(resourcesToIndex); 334 } 335 } 336 } 337 338 /** 339 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 340 */ 341 protected class CmsSearchOfflineIndexThread extends Thread { 342 343 /** The event handler that triggers this thread. */ 344 CmsSearchOfflineHandler m_handler; 345 346 /** Indicates if this thread is still alive. */ 347 boolean m_isAlive; 348 349 /** Indicates that an index update thread is currently running. */ 350 private boolean m_isUpdating; 351 352 /** If true a manual update (after file upload) was triggered. */ 353 private boolean m_updateTriggered; 354 355 /** The wait handle used for signalling when the worker thread has finished. */ 356 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 357 358 /** 359 * Constructor.<p> 360 * 361 * @param handler the offline index event handler 362 */ 363 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 364 365 super("OpenCms: Offline Search Indexer"); 366 m_handler = handler; 367 } 368 369 /** 370 * Gets the wait handle used for signalling when the worker thread has finished. 371 * 372 * @return the wait handle 373 **/ 374 public CmsWaitHandle getWaitHandle() { 375 376 return m_waitHandle; 377 } 378 379 /** 380 * @see java.lang.Thread#interrupt() 381 */ 382 @Override 383 public void interrupt() { 384 385 super.interrupt(); 386 m_updateTriggered = true; 387 } 388 389 /** 390 * @see java.lang.Thread#run() 391 */ 392 @Override 393 public void run() { 394 395 // create a log report for the output 396 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 397 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 398 m_updateTriggered = false; 399 try { 400 while (m_isAlive) { 401 if (!m_updateTriggered) { 402 try { 403 sleep(offlineUpdateFrequency); 404 } catch (InterruptedException e) { 405 // continue the thread after interruption 406 if (!m_isAlive) { 407 // the thread has been shut down while sleeping 408 continue; 409 } 410 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 411 // offline update frequency change - clear interrupt status 412 offlineUpdateFrequency = getOfflineUpdateFrequency(); 413 } 414 LOG.info(e.getLocalizedMessage(), e); 415 } 416 } 417 if (m_isAlive) { 418 // set update trigger to false since we do the update now 419 m_updateTriggered = false; 420 // get list of resource to update 421 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 422 if (resourcesToIndex.size() > 0) { 423 // only start indexing if there is at least one resource 424 startOfflineUpdateThread(report, resourcesToIndex); 425 } else { 426 getWaitHandle().release(); 427 } 428 // this is just called to clear the interrupt status of the thread 429 interrupted(); 430 } 431 } 432 } finally { 433 // make sure that live status is reset in case of Exceptions 434 m_isAlive = false; 435 } 436 437 } 438 439 /** 440 * @see java.lang.Thread#start() 441 */ 442 @Override 443 public synchronized void start() { 444 445 m_isAlive = true; 446 super.start(); 447 } 448 449 /** 450 * Obtains the list of resource to update in the offline index, 451 * then optimizes the list by removing duplicate entries.<p> 452 * 453 * @return the list of resource to update in the offline index 454 */ 455 protected List<CmsPublishedResource> getResourcesToIndex() { 456 457 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 458 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 459 460 // Reverse to always keep the last list entries 461 Collections.reverse(resourcesToIndex); 462 for (CmsPublishedResource pubRes : resourcesToIndex) { 463 boolean addResource = true; 464 for (CmsPublishedResource resRes : result) { 465 if (pubRes.equals(resRes) 466 && (pubRes.getState() == resRes.getState()) 467 && (pubRes.getMovedState() == resRes.getMovedState()) 468 && pubRes.getRootPath().equals(resRes.getRootPath())) { 469 // resource already in the update list 470 addResource = false; 471 break; 472 } 473 } 474 if (addResource) { 475 result.add(pubRes); 476 } 477 478 } 479 Collections.reverse(result); 480 return changeStateOfMoveOriginsToDeleted(result); 481 } 482 483 /** 484 * Shuts down this offline index thread.<p> 485 */ 486 protected void shutDown() { 487 488 m_isAlive = false; 489 interrupt(); 490 if (m_isUpdating) { 491 long waitTime = getOfflineUpdateFrequency() / 2; 492 int waitSteps = 0; 493 do { 494 try { 495 // wait half the time of the offline index frequency for the thread to finish 496 Thread.sleep(waitTime); 497 } catch (InterruptedException e) { 498 // continue 499 LOG.info(e.getLocalizedMessage(), e); 500 } 501 waitSteps++; 502 // wait 5 times then stop waiting 503 } while ((waitSteps < 5) && m_isUpdating); 504 } 505 } 506 507 /** 508 * Updates the offline search indexes for the given list of resources.<p> 509 * 510 * @param report the report to write the index information to 511 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 512 */ 513 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 514 515 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 516 long startTime = System.currentTimeMillis(); 517 long waitTime = getOfflineUpdateFrequency() / 2; 518 if (LOG.isDebugEnabled()) { 519 LOG.debug( 520 Messages.get().getBundle().key( 521 Messages.LOG_OI_UPDATE_START_1, 522 Integer.valueOf(resourcesToIndex.size()))); 523 } 524 525 m_isUpdating = true; 526 thread.start(); 527 528 do { 529 try { 530 // wait half the time of the offline index frequency for the thread to finish 531 thread.join(waitTime); 532 } catch (InterruptedException e) { 533 // continue 534 LOG.info(e.getLocalizedMessage(), e); 535 } 536 if (thread.isAlive()) { 537 LOG.warn( 538 Messages.get().getBundle().key( 539 Messages.LOG_OI_UPDATE_LONG_2, 540 Integer.valueOf(resourcesToIndex.size()), 541 Long.valueOf(System.currentTimeMillis() - startTime))); 542 } 543 } while (thread.isAlive()); 544 m_isUpdating = false; 545 546 if (LOG.isDebugEnabled()) { 547 LOG.debug( 548 Messages.get().getBundle().key( 549 Messages.LOG_OI_UPDATE_FINISH_2, 550 Integer.valueOf(resourcesToIndex.size()), 551 Long.valueOf(System.currentTimeMillis() - startTime))); 552 } 553 } 554 555 /** 556 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 557 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 558 * 559 * @param resourcesToIndex the resources to index 560 * 561 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 562 */ 563 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 564 List<CmsPublishedResource> resourcesToIndex) { 565 566 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 567 for (CmsPublishedResource resource : resourcesToIndex) { 568 if (resource.getState().isDeleted()) { 569 // we don't want the last path to be from a deleted resource 570 continue; 571 } 572 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 573 } 574 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 575 for (CmsPublishedResource resource : resourcesToIndex) { 576 if (resource.getState().isDeleted()) { 577 result.add(resource); 578 continue; 579 } 580 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 581 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 582 result.add(resource); 583 } else { 584 result.add( 585 new CmsPublishedResource( 586 resource.getStructureId(), 587 resource.getResourceId(), 588 resource.getPublishTag(), 589 resource.getRootPath(), 590 resource.getType(), 591 resource.isFolder(), 592 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 593 resource.getSiblingCount())); 594 } 595 } 596 return result; 597 } 598 } 599 600 /** 601 * An offline index worker Thread runs each time for every offline index update action.<p> 602 * 603 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 604 * problems if a single operation "hangs" the Tread.<p> 605 */ 606 protected class CmsSearchOfflineIndexWorkThread extends Thread { 607 608 /** The report to write the index information to. */ 609 I_CmsReport m_report; 610 611 /** The list of {@link CmsPublishedResource} objects to index. */ 612 List<CmsPublishedResource> m_resourcesToIndex; 613 614 /** 615 * Updates the offline search indexes for the given list of resources.<p> 616 * 617 * @param report the report to write the index information to 618 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 619 */ 620 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 621 622 super("OpenCms: Offline Search Index Worker"); 623 m_report = report; 624 m_resourcesToIndex = resourcesToIndex; 625 } 626 627 /** 628 * @see java.lang.Thread#run() 629 */ 630 @Override 631 public void run() { 632 633 updateIndexOffline(m_report, m_resourcesToIndex); 634 if (m_offlineIndexThread != null) { 635 m_offlineIndexThread.getWaitHandle().release(); 636 } 637 } 638 } 639 640 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 641 private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); 642 643 /** The default value used for generating search result excerpts (1024 chars). */ 644 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 645 646 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 647 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 648 649 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 650 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 651 652 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 653 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 654 655 /** The default maximal wait time for re-indexing after editing a content. */ 656 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 657 658 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 659 public static final int DEFAULT_TIMEOUT = 60000; 660 661 /** Scheduler parameter: Update only a specified list of indexes. */ 662 public static final String JOB_PARAM_INDEXLIST = "indexList"; 663 664 /** Scheduler parameter: Write the output of the update to the logfile. */ 665 public static final String JOB_PARAM_WRITELOG = "writeLog"; 666 667 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 668 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 669 670 /** The log object for this class. */ 671 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 672 673 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 674 protected CmsObject m_adminCms; 675 676 /** The list of indexes that are configured for offline index mode. */ 677 protected List<I_CmsSearchIndex> m_offlineIndexes; 678 679 /** The thread used of offline indexing. */ 680 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 681 682 /** Configured analyzers for languages using <analyzer>. */ 683 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 684 685 /** Stores the offline update frequency while indexing is paused. */ 686 private long m_configuredOfflineIndexingFrequency; 687 688 /** The Solr core container. */ 689 private CoreContainer m_coreContainer; 690 691 /** A map of document factory configurations. */ 692 private List<CmsSearchDocumentType> m_documentTypeConfigs; 693 694 /** A map of document factories keyed first by their name and then by their extraction keys. */ 695 private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes; 696 697 /** The set of all globally available extraction keys for document factories. */ 698 private Set<String> m_extractionKeys; 699 700 /** The max age for extraction results to remain in the cache. */ 701 private float m_extractionCacheMaxAge; 702 703 /** The cache for the extraction results. */ 704 private CmsExtractionResultCache m_extractionResultCache; 705 706 /** Contains the available field configurations. */ 707 private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations; 708 709 /** The force unlock type. */ 710 private CmsSearchForceUnlockMode m_forceUnlockMode; 711 712 /** The class used to highlight the search terms in the excerpt of a search result. */ 713 private I_CmsTermHighlighter m_highlighter; 714 715 /** A list of search indexes. */ 716 private List<I_CmsSearchIndex> m_indexes; 717 718 /** Seconds to wait for an index lock. */ 719 private int m_indexLockMaxWaitSeconds = 10; 720 721 /** Configured index sources. */ 722 private Map<String, CmsSearchIndexSource> m_indexSources; 723 724 /** The max. char. length of the excerpt in the search result. */ 725 private int m_maxExcerptLength; 726 727 /** The maximum number of modifications before a commit in the search index is triggered. */ 728 private int m_maxModificationsBeforeCommit; 729 730 /** The offline index search handler. */ 731 private CmsSearchOfflineHandler m_offlineHandler; 732 733 /** The update frequency of the offline indexer in milliseconds. */ 734 private long m_offlineUpdateFrequency; 735 736 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 737 private long m_maxIndexWaitTime; 738 739 /** Path to index files below WEB-INF/. */ 740 private String m_path; 741 742 /** The Solr configuration. */ 743 private CmsSolrConfiguration m_solrConfig; 744 745 /** Timeout for abandoning indexing thread. */ 746 private long m_timeout; 747 748 /** 749 * Default constructor when called as cron job.<p> 750 */ 751 public CmsSearchManager() { 752 753 m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>(); 754 m_extractionKeys = new HashSet<String>(); 755 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 756 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 757 m_indexes = new ArrayList<I_CmsSearchIndex>(); 758 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 759 m_offlineHandler = new CmsSearchOfflineHandler(); 760 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 761 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 762 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 763 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 764 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 765 766 m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>(); 767 // make sure we have a "standard" field configuration 768 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 769 770 if (CmsLog.INIT.isInfoEnabled()) { 771 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 772 } 773 } 774 775 /** 776 * Returns an analyzer for the given class name.<p> 777 * 778 * @param className the class name of the analyzer 779 * 780 * @return the appropriate lucene analyzer 781 * 782 * @throws Exception if something goes wrong 783 */ 784 public static Analyzer getAnalyzer(String className) throws Exception { 785 786 Analyzer analyzer = null; 787 Class<?> analyzerClass; 788 try { 789 analyzerClass = Class.forName(className); 790 } catch (ClassNotFoundException e) { 791 // allow Lucene standard classes to be written in a short form 792 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 793 } 794 795 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 796 if (StandardAnalyzer.class.equals(analyzerClass)) { 797 // the Lucene standard analyzer is used - but without any stopwords. 798 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 799 } else { 800 analyzer = (Analyzer)analyzerClass.newInstance(); 801 } 802 return analyzer; 803 } 804 805 /** 806 * Returns the Solr index configured with the parameters name. 807 * The parameters must contain a key/value pair with an existing 808 * Solr index, otherwise <code>null</code> is returned.<p> 809 * 810 * @param cms the current context 811 * @param params the parameter map 812 * 813 * @return the best matching Solr index 814 */ 815 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 816 817 String indexName = null; 818 CmsSolrIndex index = null; 819 // try to get the index name from the parameters: 'core' or 'index' 820 if (params != null) { 821 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 822 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 823 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 824 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 825 : null); 826 } 827 if (indexName == null) { 828 // if no parameter is specified try to use the default online/offline indexes by context 829 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 830 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 831 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 832 } 833 // try to get the index 834 index = OpenCms.getSearchManager().getIndexSolr(indexName); 835 if (index == null) { 836 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 837 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 838 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 839 index = solrs.get(0); 840 } 841 } 842 return index; 843 } 844 845 /** 846 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 847 * 848 * @param indexName the name of the index to check 849 * 850 * @return <code>true</code> if the index for the given name is a Lucene index 851 */ 852 public static boolean isLuceneIndex(String indexName) { 853 854 I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 855 return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex)); 856 } 857 858 /** 859 * Adds an analyzer.<p> 860 * 861 * @param analyzer an analyzer 862 */ 863 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 864 865 m_analyzers.put(analyzer.getLocale(), analyzer); 866 867 if (CmsLog.INIT.isInfoEnabled()) { 868 CmsLog.INIT.info( 869 Messages.get().getBundle().key( 870 Messages.INIT_ADD_ANALYZER_2, 871 analyzer.getLocale(), 872 analyzer.getClassName())); 873 } 874 } 875 876 /** 877 * Adds a document type.<p> 878 * 879 * @param documentType a document type 880 */ 881 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 882 883 m_documentTypeConfigs.add(documentType); 884 885 if (CmsLog.INIT.isInfoEnabled()) { 886 CmsLog.INIT.info( 887 Messages.get().getBundle().key( 888 Messages.INIT_SEARCH_DOC_TYPES_2, 889 documentType.getName(), 890 documentType.getClassName())); 891 } 892 } 893 894 /** 895 * Adds a search field configuration to the search manager.<p> 896 * 897 * @param fieldConfiguration the search field configuration to add 898 */ 899 public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) { 900 901 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 902 } 903 904 /** 905 * Adds a search index to the configuration.<p> 906 * 907 * @param searchIndex the search index to add 908 */ 909 public void addSearchIndex(I_CmsSearchIndex searchIndex) { 910 911 if (!searchIndex.isInitialized()) { 912 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 913 try { 914 searchIndex.initialize(); 915 } catch (CmsException e) { 916 // should never happen 917 LOG.error(e.getMessage(), e); 918 } 919 } 920 } 921 922 // name: not null or emtpy and unique 923 String name = searchIndex.getName(); 924 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 925 throw new CmsIllegalArgumentException( 926 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 927 } 928 if (m_indexSources.keySet().contains(name)) { 929 throw new CmsIllegalArgumentException( 930 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 931 } 932 933 m_indexes.add(searchIndex); 934 if (m_adminCms != null) { 935 initOfflineIndexes(); 936 } 937 938 if (CmsLog.INIT.isInfoEnabled()) { 939 CmsLog.INIT.info( 940 Messages.get().getBundle().key( 941 Messages.INIT_ADD_SEARCH_INDEX_2, 942 searchIndex.getName(), 943 searchIndex.getProject())); 944 } 945 } 946 947 /** 948 * Adds a search index source configuration.<p> 949 * 950 * @param searchIndexSource a search index source configuration 951 */ 952 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 953 954 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 955 956 if (CmsLog.INIT.isInfoEnabled()) { 957 CmsLog.INIT.info( 958 Messages.get().getBundle().key( 959 Messages.INIT_SEARCH_INDEX_SOURCE_2, 960 searchIndexSource.getName(), 961 searchIndexSource.getIndexerClassName())); 962 } 963 } 964 965 /** 966 * Implements the event listener of this class.<p> 967 * 968 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 969 */ 970 public void cmsEvent(CmsEvent event) { 971 972 switch (event.getType()) { 973 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 974 List<String> indexNames = null; 975 if ((event.getData() != null) 976 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 977 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 978 indexNames = CmsStringUtil.splitAsList( 979 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 980 ",", 981 true); 982 } 983 try { 984 if (LOG.isDebugEnabled()) { 985 LOG.debug( 986 Messages.get().getBundle().key( 987 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 988 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 989 new Exception()); 990 } 991 if (indexNames == null) { 992 rebuildAllIndexes(getEventReport(event)); 993 } else { 994 rebuildIndexes(indexNames, getEventReport(event)); 995 } 996 } catch (CmsException e) { 997 if (LOG.isErrorEnabled()) { 998 LOG.error( 999 Messages.get().getBundle().key( 1000 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 1001 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1002 e); 1003 } 1004 } 1005 break; 1006 case I_CmsEventListener.EVENT_CLEAR_CACHES: 1007 if (LOG.isDebugEnabled()) { 1008 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 1009 } 1010 break; 1011 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 1012 // event data contains a list of the published resources 1013 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1014 if (LOG.isDebugEnabled()) { 1015 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1016 } 1017 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1018 if (LOG.isDebugEnabled()) { 1019 LOG.debug( 1020 Messages.get().getBundle().key( 1021 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1022 publishHistoryId)); 1023 } 1024 break; 1025 case I_CmsEventListener.EVENT_REINDEX_OFFLINE: 1026 case I_CmsEventListener.EVENT_REINDEX_ONLINE: 1027 boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType(); 1028 Map<String, Object> eventData = event.getData(); 1029 CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID); 1030 CmsUser user = null; 1031 try { 1032 user = m_adminCms.readUser(userId); 1033 } catch (Throwable t) { 1034 // should never happen 1035 } 1036 try { 1037 SEARCH_MANAGER_LOCK.lock(); 1038 if (LOG.isDebugEnabled()) { 1039 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0)); 1040 } 1041 CmsObject cms = m_adminCms; 1042 if (!isOnline) { 1043 OpenCms.initCmsObject(m_adminCms); 1044 cms.getRequestContext().setCurrentProject( 1045 cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID))); 1046 } 1047 @SuppressWarnings("unchecked") 1048 List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES); 1049 I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT); 1050 List<CmsResource> resourcesToIndex = new ArrayList<>(); 1051 for (CmsResource res : resources) { 1052 if (res.isFile()) { 1053 resourcesToIndex.add(res); 1054 } else { 1055 try { 1056 resourcesToIndex.addAll( 1057 cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true)); 1058 } catch (CmsException e) { 1059 LOG.error(e, e); 1060 } 1061 } 1062 } 1063 // we reindex and prevent using cached results 1064 cleanExtractionCache(); 1065 List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map( 1066 res -> new CmsPublishedResource(res)).collect(Collectors.toList()); 1067 if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) { 1068 addAdditionallyAffectedResources(cms, publishedResourcesToIndex); 1069 } 1070 if (isOnline) { 1071 updateAllIndexes( 1072 m_adminCms, 1073 publishedResourcesToIndex, 1074 new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE)); 1075 } else { 1076 updateIndexOffline(report, publishedResourcesToIndex); 1077 } 1078 cms = null; 1079 SEARCH_MANAGER_LOCK.unlock(); 1080 if (null != user) { 1081 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1082 OpenCms.getSessionManager().sendBroadcast( 1083 null, 1084 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0), 1085 user, 1086 ContentMode.html); 1087 } 1088 if (LOG.isDebugEnabled()) { 1089 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0)); 1090 } 1091 1092 } catch (Throwable e) { 1093 if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) { 1094 SEARCH_MANAGER_LOCK.unlock(); 1095 } 1096 if (null != user) { 1097 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1098 OpenCms.getSessionManager().sendBroadcast( 1099 null, 1100 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0), 1101 user, 1102 ContentMode.html); 1103 } 1104 if (LOG.isDebugEnabled()) { 1105 LOG.error( 1106 Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()), 1107 e); 1108 } else if (LOG.isErrorEnabled()) { 1109 LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData())); 1110 } 1111 } 1112 break; 1113 default: 1114 // no operation 1115 } 1116 } 1117 1118 /** 1119 * Returns all Solr index.<p> 1120 * 1121 * @return all Solr indexes 1122 */ 1123 public List<CmsSolrIndex> getAllSolrIndexes() { 1124 1125 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1126 for (String indexName : getIndexNames()) { 1127 CmsSolrIndex index = getIndexSolr(indexName); 1128 if (index != null) { 1129 result.add(index); 1130 } 1131 } 1132 return result; 1133 } 1134 1135 /** 1136 * Returns an analyzer for the given language.<p> 1137 * 1138 * The analyzer is selected according to the analyzer configuration.<p> 1139 * 1140 * @param locale the locale to get the analyzer for 1141 * @return the appropriate lucene analyzer 1142 * 1143 * @throws CmsSearchException if something goes wrong 1144 */ 1145 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1146 1147 Analyzer analyzer = null; 1148 String className = null; 1149 1150 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1151 if (analyzerConf == null) { 1152 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1153 } 1154 1155 try { 1156 analyzer = getAnalyzer(analyzerConf.getClassName()); 1157 } catch (Exception e) { 1158 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1159 } 1160 1161 return analyzer; 1162 } 1163 1164 /** 1165 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1166 * 1167 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1168 * 1169 * @return an unmodifiable view of the Analyzers Map 1170 */ 1171 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1172 1173 return Collections.unmodifiableMap(m_analyzers); 1174 } 1175 1176 /** 1177 * Returns the search analyzer for the given locale.<p> 1178 * 1179 * @param locale the locale to get the analyzer for 1180 * 1181 * @return the search analyzer for the given locale 1182 */ 1183 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1184 1185 return m_analyzers.get(locale); 1186 } 1187 1188 /** 1189 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1190 * 1191 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1192 */ 1193 public String getDirectory() { 1194 1195 return m_path; 1196 } 1197 1198 /** 1199 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1200 * 1201 * @return the Solr home directory 1202 */ 1203 public String getDirectorySolr() { 1204 1205 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1206 } 1207 1208 /** 1209 * Returns the document factory configured under the provided name. 1210 * @param docTypeName the name of the document type. 1211 * @return the factory for the provided name. 1212 */ 1213 public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) { 1214 1215 Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName); 1216 if (factoryMap != null) { 1217 Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator(); 1218 if (factoryIt.hasNext()) { 1219 return factoryMap.values().iterator().next(); 1220 } 1221 } 1222 return null; 1223 } 1224 1225 /** 1226 * Returns a document type config.<p> 1227 * 1228 * @param name the name of the document type config 1229 * @return the document type config. 1230 */ 1231 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1232 1233 // this is really used only for the search manager GUI, 1234 // so performance is not an issue and no lookup map is generated 1235 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1236 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1237 if (type.getName().equals(name)) { 1238 return type; 1239 } 1240 } 1241 return null; 1242 } 1243 1244 /** 1245 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1246 * 1247 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1248 */ 1249 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1250 1251 return Collections.unmodifiableList(m_documentTypeConfigs); 1252 } 1253 1254 /** 1255 * Returns the document type keys used to specify the correct document factory. 1256 * 1257 * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys. 1258 * 1259 * @param resource the resource to generate the list of document type keys for. 1260 * @return the document type keys. 1261 */ 1262 public List<String> getDocumentTypeKeys(CmsResource resource) { 1263 1264 // first get the MIME type of the resource 1265 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1266 String resourceType = null; 1267 try { 1268 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1269 } catch (CmsLoaderException e) { 1270 // ignore, unknown resource type, resource can not be indexed 1271 LOG.info(e.getLocalizedMessage(), e); 1272 } 1273 return getDocumentTypeKeys(resourceType, mimeType); 1274 } 1275 1276 /** 1277 * Returns the document type keys used to specify the correct document factory. 1278 * One resource typically has more than one key. The document factories are matched 1279 * in the provided order and the first matching factory is used. 1280 * 1281 * The keys for type name "typename" and mimetype "mimetype" would be a subset of: 1282 * <ul> 1283 * <li><code>typename_mimetype</code></li> 1284 * <li><code>typename</code></li> 1285 * <li>if <code>typename</code> is a sub-type of <code>containerpage</code> 1286 * <ul> 1287 * <li><code>containerpage_mimetype</code></li> 1288 * <li><code>containerpage</code></li> 1289 * </ul> 1290 * </li> 1291 * <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code> 1292 * <ul> 1293 * <li><code>xmlcontent_mimetype</code></li> 1294 * <li><code>xmlcontent</code></li> 1295 * </ul> 1296 * </li> 1297 * <li><code>__unconfigured___mimetype</code></li> 1298 * <li><code>__unconfigured__</code></li> 1299 * <li><code>__all___mimetype</code></li> 1300 * <li><code>__all__</code></li> 1301 * <ul> 1302 * Note that all keys except the "__all__"-keys are only added as long as globally 1303 * there is no matching factory for the key. 1304 * This in particular means that a factory matching "typename" will never be used 1305 * if you have a factory for "typename__mimetype" - even if this is not configured 1306 * for the used index source. Eventually, the content will not be indexed in such cases. 1307 * @param resourceType the resource type to generate the list of document type keys for. 1308 * @param mimeType the mime type to generate the list of document type keys for. 1309 * @return the document type keys. 1310 */ 1311 public List<String> getDocumentTypeKeys(String resourceType, String mimeType) { 1312 1313 List<String> result = new ArrayList<>(8); 1314 if (null != resourceType) { 1315 String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1316 result.add(currentKey); 1317 if (!m_extractionKeys.contains(currentKey)) { 1318 currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null); 1319 result.add(currentKey); 1320 if (!m_extractionKeys.contains(currentKey)) { 1321 boolean hasGlobalMatch = false; 1322 try { 1323 String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName(); 1324 I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType); 1325 if (!resourceType.equals(containerpageTypeName)) { 1326 if (type instanceof CmsResourceTypeXmlContainerPage) { 1327 if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) { 1328 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType); 1329 result.add(currentKey); 1330 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1331 if (!hasGlobalMatch) { 1332 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null); 1333 result.add(currentKey); 1334 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1335 } 1336 } 1337 } 1338 } 1339 String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName(); 1340 if (!resourceType.equals(containerpageTypeName)) { 1341 if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) { 1342 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType); 1343 result.add(currentKey); 1344 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1345 if (!hasGlobalMatch) { 1346 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null); 1347 result.add(currentKey); 1348 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1349 } 1350 } 1351 } 1352 } catch (Throwable t) { 1353 LOG.warn("Could not read type for name \"" + resourceType + "\".", t); 1354 } 1355 if (!hasGlobalMatch) { 1356 result.add( 1357 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType)); 1358 result.add( 1359 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null)); 1360 } 1361 } 1362 } 1363 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType)); 1364 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null)); 1365 } 1366 return result; 1367 1368 } 1369 1370 /** 1371 * Returns the map from document type keys to document factories with all entries for the provided document type names. 1372 * @param documentTypeNames list of document type names to generate the map for. 1373 * @return the map from document type keys to document factories. 1374 */ 1375 public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) { 1376 1377 Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>(); 1378 if (null != documentTypeNames) { 1379 // Iterate the list in reverse order to prefer factories that are added by document types listed earlier. 1380 ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size()); 1381 while (typesIterator.hasPrevious()) { 1382 Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous()); 1383 if (null != factories) { 1384 result.putAll(factories); 1385 } 1386 } 1387 } 1388 return result; 1389 } 1390 1391 /** 1392 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1393 * 1394 * @return the maximum age a text extraction result is kept in the cache (in hours) 1395 */ 1396 public float getExtractionCacheMaxAge() { 1397 1398 return m_extractionCacheMaxAge; 1399 } 1400 1401 /** 1402 * Returns the search field configuration with the given name.<p> 1403 * 1404 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1405 * 1406 * @param name the name to get the search field configuration for 1407 * 1408 * @return the search field configuration with the given name 1409 */ 1410 public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1411 1412 return m_fieldConfigurations.get(name); 1413 } 1414 1415 /** 1416 * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p> 1417 * 1418 * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries 1419 */ 1420 public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() { 1421 1422 List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>( 1423 m_fieldConfigurations.values()); 1424 Collections.sort(result); 1425 return Collections.unmodifiableList(result); 1426 } 1427 1428 /** 1429 * Returns the Lucene search field configurations only.<p> 1430 * 1431 * @return the Lucene search field configurations 1432 */ 1433 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1434 1435 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1436 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1437 if (conf instanceof CmsLuceneFieldConfiguration) { 1438 result.add((CmsLuceneFieldConfiguration)conf); 1439 } 1440 } 1441 Collections.sort(result); 1442 return Collections.unmodifiableList(result); 1443 } 1444 1445 /** 1446 * Returns the Solr search field configurations only.<p> 1447 * 1448 * @return the Solr search field configurations 1449 */ 1450 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1451 1452 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1453 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1454 if (conf instanceof CmsSolrFieldConfiguration) { 1455 result.add((CmsSolrFieldConfiguration)conf); 1456 } 1457 } 1458 Collections.sort(result); 1459 return Collections.unmodifiableList(result); 1460 } 1461 1462 /** 1463 * Returns the force unlock mode during indexing.<p> 1464 * 1465 * @return the force unlock mode during indexing 1466 */ 1467 public CmsSearchForceUnlockMode getForceunlock() { 1468 1469 return m_forceUnlockMode; 1470 } 1471 1472 /** 1473 * Returns the highlighter.<p> 1474 * 1475 * @return the highlighter 1476 */ 1477 public I_CmsTermHighlighter getHighlighter() { 1478 1479 return m_highlighter; 1480 } 1481 1482 /** 1483 * Returns the Lucene search index configured with the given name.<p> 1484 * The index must exist, otherwise <code>null</code> is returned. 1485 * 1486 * @param indexName then name of the requested search index 1487 * 1488 * @return the Lucene search index configured with the given name 1489 */ 1490 public I_CmsSearchIndex getIndex(String indexName) { 1491 1492 for (I_CmsSearchIndex index : m_indexes) { 1493 if (indexName.equalsIgnoreCase(index.getName())) { 1494 return index; 1495 } 1496 } 1497 return null; 1498 } 1499 1500 /** 1501 * Returns the seconds to wait for an index lock during an update operation.<p> 1502 * 1503 * @return the seconds to wait for an index lock during an update operation 1504 */ 1505 public int getIndexLockMaxWaitSeconds() { 1506 1507 return m_indexLockMaxWaitSeconds; 1508 } 1509 1510 /** 1511 * Returns the names of all configured indexes.<p> 1512 * 1513 * @return list of names 1514 */ 1515 public List<String> getIndexNames() { 1516 1517 List<String> indexNames = new ArrayList<String>(); 1518 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1519 indexNames.add((m_indexes.get(i)).getName()); 1520 } 1521 1522 return indexNames; 1523 } 1524 1525 /** 1526 * Returns the Solr index configured with the given name.<p> 1527 * The index must exist, otherwise <code>null</code> is returned. 1528 * 1529 * @param indexName then name of the requested Solr index 1530 * @return the Solr index configured with the given name 1531 */ 1532 public CmsSolrIndex getIndexSolr(String indexName) { 1533 1534 I_CmsSearchIndex index = getIndex(indexName); 1535 if (index instanceof CmsSolrIndex) { 1536 return (CmsSolrIndex)index; 1537 } 1538 return null; 1539 } 1540 1541 /** 1542 * Returns a search index source for a specified source name.<p> 1543 * 1544 * @param sourceName the name of the index source 1545 * @return a search index source 1546 */ 1547 public CmsSearchIndexSource getIndexSource(String sourceName) { 1548 1549 return m_indexSources.get(sourceName); 1550 } 1551 1552 /** 1553 * Returns the max. excerpt length.<p> 1554 * 1555 * @return the max excerpt length 1556 */ 1557 public int getMaxExcerptLength() { 1558 1559 return m_maxExcerptLength; 1560 } 1561 1562 /** 1563 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1564 * 1565 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1566 */ 1567 public long getMaxIndexWaitTime() { 1568 1569 return m_maxIndexWaitTime; 1570 } 1571 1572 /** 1573 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1574 * 1575 * @return the maximum number of modifications before a commit in the search index is triggered 1576 */ 1577 public int getMaxModificationsBeforeCommit() { 1578 1579 return m_maxModificationsBeforeCommit; 1580 } 1581 1582 /** 1583 * Returns the update frequency of the offline indexer in milliseconds.<p> 1584 * 1585 * @return the update frequency of the offline indexer in milliseconds 1586 */ 1587 public long getOfflineUpdateFrequency() { 1588 1589 return m_offlineUpdateFrequency; 1590 } 1591 1592 /** 1593 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1594 * 1595 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1596 */ 1597 public List<I_CmsSearchIndex> getSearchIndexes() { 1598 1599 return Collections.unmodifiableList(m_indexes); 1600 } 1601 1602 /** 1603 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1604 * 1605 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1606 */ 1607 public List<I_CmsSearchIndex> getSearchIndexesAll() { 1608 1609 return Collections.unmodifiableList(m_indexes); 1610 } 1611 1612 /** 1613 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1614 * 1615 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1616 */ 1617 public List<CmsSolrIndex> getSearchIndexesSolr() { 1618 1619 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1620 for (I_CmsSearchIndex index : m_indexes) { 1621 if (index instanceof CmsSolrIndex) { 1622 indexes.add((CmsSolrIndex)index); 1623 } 1624 } 1625 return Collections.unmodifiableList(indexes); 1626 } 1627 1628 /** 1629 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1630 * 1631 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1632 */ 1633 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1634 1635 return Collections.unmodifiableMap(m_indexSources); 1636 } 1637 1638 /** 1639 * Return singleton instance of the OpenCms spellchecker.<p> 1640 * 1641 * @return instance of CmsSolrSpellchecker. 1642 */ 1643 public CmsSolrSpellchecker getSolrDictionary() { 1644 1645 // get the core container that contains one core for each configured index 1646 if (m_coreContainer == null) { 1647 m_coreContainer = createCoreContainer(); 1648 } 1649 return CmsSolrSpellchecker.getInstance(m_coreContainer); 1650 } 1651 1652 /** 1653 * Returns the Solr configuration.<p> 1654 * 1655 * @return the Solr configuration 1656 */ 1657 public CmsSolrConfiguration getSolrServerConfiguration() { 1658 1659 return m_solrConfig; 1660 } 1661 1662 /** 1663 * Returns the timeout to abandon threads indexing a resource.<p> 1664 * 1665 * @return the timeout to abandon threads indexing a resource 1666 */ 1667 public long getTimeout() { 1668 1669 return m_timeout; 1670 } 1671 1672 /** 1673 * Initializes the search manager.<p> 1674 * 1675 * @param cms the cms object 1676 * 1677 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1678 */ 1679 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1680 1681 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1682 try { 1683 // store the Admin cms to index Cms resources 1684 m_adminCms = OpenCms.initCmsObject(cms); 1685 } catch (CmsException e) { 1686 // this should never happen 1687 LOG.error(e.getLocalizedMessage(), e); 1688 } 1689 // make sure the site root is the root site 1690 m_adminCms.getRequestContext().setSiteRoot("/"); 1691 1692 // create the extraction result cache 1693 m_extractionResultCache = new CmsExtractionResultCache( 1694 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1695 "/extractCache"); 1696 initializeFieldConfigurations(); 1697 initializeIndexes(); 1698 initOfflineIndexes(); 1699 1700 // register this object as event listener 1701 OpenCms.addCmsEventListener( 1702 this, 1703 new int[] { 1704 I_CmsEventListener.EVENT_CLEAR_CACHES, 1705 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1706 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES, 1707 I_CmsEventListener.EVENT_REINDEX_OFFLINE, 1708 I_CmsEventListener.EVENT_REINDEX_ONLINE}); 1709 } 1710 1711 /** 1712 * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations. 1713 */ 1714 public void initializeFieldConfigurations() { 1715 1716 for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) { 1717 config.init(); 1718 } 1719 1720 } 1721 1722 /** 1723 * Initializes all configured document types, index sources and search indexes.<p> 1724 * 1725 * This methods needs to be called if after a change in the index configuration has been made. 1726 */ 1727 public void initializeIndexes() { 1728 1729 initAvailableDocumentTypes(); 1730 initIndexSources(); 1731 initSearchIndexes(); 1732 } 1733 1734 /** 1735 * Initialize the offline index handler, require after an offline index has been added.<p> 1736 */ 1737 public void initOfflineIndexes() { 1738 1739 // check which indexes are configured as offline indexes 1740 List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>(); 1741 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 1742 while (i.hasNext()) { 1743 I_CmsSearchIndex index = i.next(); 1744 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1745 // this is an offline index 1746 offlineIndexes.add(index); 1747 } 1748 } 1749 m_offlineIndexes = offlineIndexes; 1750 m_offlineHandler.initialize(); 1751 1752 } 1753 1754 /** 1755 * Initializes the spell check index.<p> 1756 * 1757 * @param adminCms the ROOT_ADMIN cms context 1758 */ 1759 public void initSpellcheckIndex(CmsObject adminCms) { 1760 1761 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) { 1762 final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary(); 1763 if (spellchecker != null) { 1764 1765 Runnable initRunner = new Runnable() { 1766 1767 public void run() { 1768 1769 try { 1770 spellchecker.parseAndAddDictionaries(adminCms); 1771 } catch (CmsRoleViolationException e) { 1772 LOG.error(e.getLocalizedMessage(), e); 1773 } 1774 } 1775 }; 1776 new Thread(initRunner).start(); 1777 } 1778 } 1779 } 1780 1781 /** 1782 * Returns if the offline indexing is paused.<p> 1783 * 1784 * @return <code>true</code> if the offline indexing is paused 1785 */ 1786 public boolean isOfflineIndexingPaused() { 1787 1788 return m_offlineUpdateFrequency == Long.MAX_VALUE; 1789 } 1790 1791 /** 1792 * Updates the indexes from as a scheduled job.<p> 1793 * 1794 * @param cms the OpenCms user context to use when reading resources from the VFS 1795 * @param parameters the parameters for the scheduled job 1796 * 1797 * @throws Exception if something goes wrong 1798 * 1799 * @return the String to write in the scheduler log 1800 * 1801 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 1802 */ 1803 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 1804 1805 CmsSearchManager manager = OpenCms.getSearchManager(); 1806 1807 I_CmsReport report = null; 1808 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 1809 1810 if (writeLog) { 1811 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 1812 } 1813 1814 List<String> updateList = null; 1815 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 1816 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 1817 // index list has been provided as job parameter 1818 updateList = new ArrayList<String>(); 1819 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 1820 for (int i = 0; i < indexNames.length; i++) { 1821 // check if the index actually exists 1822 if (manager.getIndex(indexNames[i]) != null) { 1823 updateList.add(indexNames[i]); 1824 } else { 1825 if (LOG.isWarnEnabled()) { 1826 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 1827 } 1828 } 1829 } 1830 } 1831 1832 long startTime = System.currentTimeMillis(); 1833 1834 if (updateList == null) { 1835 // all indexes need to be updated 1836 manager.rebuildAllIndexes(report); 1837 } else { 1838 // rebuild only the selected indexes 1839 manager.rebuildIndexes(updateList, report); 1840 } 1841 1842 long runTime = System.currentTimeMillis() - startTime; 1843 1844 String finishMessage = Messages.get().getBundle().key( 1845 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 1846 CmsStringUtil.formatRuntime(runTime)); 1847 1848 if (LOG.isInfoEnabled()) { 1849 LOG.info(finishMessage); 1850 } 1851 return finishMessage; 1852 } 1853 1854 /** 1855 * Pauses the offline indexing.<p> 1856 * May take some time, because the indexes are updated first.<p> 1857 */ 1858 public void pauseOfflineIndexing() { 1859 1860 if (m_offlineUpdateFrequency != Long.MAX_VALUE) { 1861 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 1862 m_offlineUpdateFrequency = Long.MAX_VALUE; 1863 updateOfflineIndexes(0); 1864 } 1865 } 1866 1867 /** 1868 * Rebuilds (if required creates) all configured indexes.<p> 1869 * 1870 * @param report the report object to write messages (or <code>null</code>) 1871 * 1872 * @throws CmsException if something goes wrong 1873 */ 1874 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 1875 1876 try { 1877 SEARCH_MANAGER_LOCK.lock(); 1878 1879 CmsMessageContainer container = null; 1880 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1881 // iterate all configured search indexes 1882 I_CmsSearchIndex searchIndex = m_indexes.get(i); 1883 try { 1884 // update the index 1885 updateIndex(searchIndex, report, null); 1886 } catch (CmsException e) { 1887 container = new CmsMessageContainer( 1888 Messages.get(), 1889 Messages.ERR_INDEX_REBUILD_ALL_1, 1890 new Object[] {searchIndex.getName()}); 1891 LOG.error( 1892 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 1893 e); 1894 } 1895 } 1896 // clean up the extraction result cache 1897 cleanExtractionCache(); 1898 if (container != null) { 1899 // throw stored exception 1900 throw new CmsSearchException(container); 1901 } 1902 } finally { 1903 SEARCH_MANAGER_LOCK.unlock(); 1904 } 1905 } 1906 1907 /** 1908 * Rebuilds (if required creates) the index with the given name.<p> 1909 * 1910 * @param indexName the name of the index to rebuild 1911 * @param report the report object to write messages (or <code>null</code>) 1912 * 1913 * @throws CmsException if something goes wrong 1914 */ 1915 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 1916 1917 try { 1918 SEARCH_MANAGER_LOCK.lock(); 1919 // get the search index by name 1920 I_CmsSearchIndex index = getIndex(indexName); 1921 // update the index 1922 updateIndex(index, report, null); 1923 // clean up the extraction result cache 1924 cleanExtractionCache(); 1925 } finally { 1926 SEARCH_MANAGER_LOCK.unlock(); 1927 } 1928 } 1929 1930 /** 1931 * Rebuilds (if required creates) the List of indexes with the given name.<p> 1932 * 1933 * @param indexNames the names (String) of the index to rebuild 1934 * @param report the report object to write messages (or <code>null</code>) 1935 * 1936 * @throws CmsException if something goes wrong 1937 */ 1938 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 1939 1940 try { 1941 SEARCH_MANAGER_LOCK.lock(); 1942 Iterator<String> i = indexNames.iterator(); 1943 while (i.hasNext()) { 1944 String indexName = i.next(); 1945 // get the search index by name 1946 I_CmsSearchIndex index = getIndex(indexName); 1947 if (index != null) { 1948 // update the index 1949 updateIndex(index, report, null); 1950 } else { 1951 if (LOG.isWarnEnabled()) { 1952 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1953 } 1954 } 1955 } 1956 // clean up the extraction result cache 1957 cleanExtractionCache(); 1958 } finally { 1959 SEARCH_MANAGER_LOCK.unlock(); 1960 } 1961 } 1962 1963 /** 1964 * Registers a new Solr core for the given index.<p> 1965 * 1966 * @param index the index to register a new Solr core for 1967 * 1968 * @throws CmsConfigurationException if no Solr server is configured 1969 */ 1970 @SuppressWarnings("resource") 1971 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 1972 1973 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 1974 // No solr server configured 1975 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 1976 } 1977 1978 if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present. 1979 index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build()); 1980 } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present. 1981 // HTTP Server configured 1982 // TODO Implement multi core support for HTTP server 1983 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 1984 index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build()); 1985 } else { // Default to the embedded Solr Server 1986 1987 // get the core container that contains one core for each configured index 1988 if (m_coreContainer == null) { 1989 m_coreContainer = createCoreContainer(); 1990 } 1991 1992 // unload the existing core if it exists to avoid problems with forced unlock. 1993 if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) { 1994 m_coreContainer.unload(index.getCoreName(), false, false, true); 1995 } 1996 // ensure that all locks on the index are gone 1997 ensureIndexIsUnlocked(index.getPath()); 1998 1999 // load the core to the container 2000 File dataDir = new File(index.getPath()); 2001 if (!dataDir.exists()) { 2002 dataDir.mkdirs(); 2003 if (CmsLog.INIT.isInfoEnabled()) { 2004 CmsLog.INIT.info( 2005 Messages.get().getBundle().key( 2006 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2007 index.getName(), 2008 index.getPath())); 2009 } 2010 } 2011 File instanceDir = new File( 2012 m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 2013 if (!instanceDir.exists()) { 2014 instanceDir.mkdirs(); 2015 if (CmsLog.INIT.isInfoEnabled()) { 2016 CmsLog.INIT.info( 2017 Messages.get().getBundle().key( 2018 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2019 index.getName(), 2020 index.getPath())); 2021 } 2022 } 2023 2024 // create the core 2025 // TODO: suboptimal - forces always the same schema 2026 SolrCore core = null; 2027 try { 2028 // creation includes registration. 2029 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 2030 Map<String, String> properties = new HashMap<String, String>(3); 2031 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 2032 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 2033 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false); 2034 } catch (NullPointerException e) { 2035 if (core != null) { 2036 core.close(); 2037 } 2038 throw new CmsConfigurationException( 2039 Messages.get().container( 2040 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 2041 index.getName() + " (" + index.getCoreName() + ")", 2042 index.getPath(), 2043 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 2044 e); 2045 } 2046 2047 if (index.isNoSolrServerSet()) { 2048 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 2049 } 2050 if (CmsLog.INIT.isInfoEnabled()) { 2051 CmsLog.INIT.info( 2052 Messages.get().getBundle().key( 2053 Messages.INIT_SOLR_SERVER_CREATED_1, 2054 index.getName() + " (" + index.getCoreName() + ")")); 2055 } 2056 } 2057 } 2058 2059 /** 2060 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 2061 * 2062 * @param fieldConfiguration the field configuration to remove from the configuration 2063 * 2064 * @return true if remove was successful, false if preconditions for removal are ok but the given 2065 * field configuration was unknown to the manager. 2066 * 2067 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 2068 * <code>{@link I_CmsSearchIndex}</code>. 2069 * 2070 */ 2071 public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) 2072 throws CmsIllegalStateException { 2073 2074 // never remove the standard field configuration 2075 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 2076 throw new CmsIllegalStateException( 2077 Messages.get().container( 2078 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 2079 fieldConfiguration.getName())); 2080 } 2081 // validation if removal will be granted 2082 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2083 I_CmsSearchIndex idx; 2084 // the list for collecting indexes that use the given field configuration 2085 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2086 I_CmsSearchFieldConfiguration refFieldConfig; 2087 while (itIndexes.hasNext()) { 2088 idx = itIndexes.next(); 2089 refFieldConfig = idx.getFieldConfiguration(); 2090 if (refFieldConfig.equals(fieldConfiguration)) { 2091 referrers.add(idx); 2092 } 2093 } 2094 if (referrers.size() > 0) { 2095 throw new CmsIllegalStateException( 2096 Messages.get().container( 2097 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 2098 fieldConfiguration.getName(), 2099 referrers.toString())); 2100 } 2101 2102 // remove operation (no exception) 2103 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 2104 2105 } 2106 2107 /** 2108 * Removes a search field from the field configuration.<p> 2109 * 2110 * @param fieldConfiguration the field configuration 2111 * @param field field to remove from the field configuration 2112 * 2113 * @return true if remove was successful, false if preconditions for removal are ok but the given 2114 * field was unknown. 2115 */ 2116 public boolean removeSearchFieldConfigurationField( 2117 I_CmsSearchFieldConfiguration fieldConfiguration, 2118 CmsSearchField field) { 2119 2120 if (LOG.isInfoEnabled()) { 2121 LOG.info( 2122 Messages.get().getBundle().key( 2123 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 2124 field.getName(), 2125 fieldConfiguration.getName())); 2126 } 2127 2128 return fieldConfiguration.getFields().remove(field); 2129 } 2130 2131 /** 2132 * Removes a search field mapping from the given field.<p> 2133 * 2134 * @param field the field 2135 * @param mapping mapping to remove from the field 2136 * 2137 * @return true if remove was successful, false if preconditions for removal are ok but the given 2138 * mapping was unknown. 2139 * 2140 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 2141 */ 2142 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 2143 throws CmsIllegalStateException { 2144 2145 if (field.getMappings().size() < 2) { 2146 throw new CmsIllegalStateException( 2147 Messages.get().container( 2148 Messages.ERR_FIELD_MAPPING_DELETE_2, 2149 mapping.getType().toString(), 2150 field.getName())); 2151 } else { 2152 2153 if (LOG.isInfoEnabled()) { 2154 LOG.info( 2155 Messages.get().getBundle().key( 2156 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 2157 mapping.toString(), 2158 field.getName())); 2159 } 2160 return field.getMappings().remove(mapping); 2161 } 2162 } 2163 2164 /** 2165 * Removes a search index from the configuration.<p> 2166 * 2167 * @param searchIndex the search index to remove 2168 */ 2169 public void removeSearchIndex(I_CmsSearchIndex searchIndex) { 2170 2171 // shut down index to remove potential config files of Solr indexes 2172 searchIndex.shutDown(); 2173 if (searchIndex instanceof CmsSolrIndex) { 2174 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 2175 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 2176 } 2177 m_indexes.remove(searchIndex); 2178 initOfflineIndexes(); 2179 2180 if (LOG.isInfoEnabled()) { 2181 LOG.info( 2182 Messages.get().getBundle().key( 2183 Messages.LOG_REMOVE_SEARCH_INDEX_2, 2184 searchIndex.getName(), 2185 searchIndex.getProject())); 2186 } 2187 } 2188 2189 /** 2190 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 2191 * 2192 * @param indexNames the names of the index to remove 2193 */ 2194 public void removeSearchIndexes(List<String> indexNames) { 2195 2196 Iterator<String> i = indexNames.iterator(); 2197 while (i.hasNext()) { 2198 String indexName = i.next(); 2199 // get the search index by name 2200 I_CmsSearchIndex index = getIndex(indexName); 2201 if (index != null) { 2202 // remove the index 2203 removeSearchIndex(index); 2204 } else { 2205 if (LOG.isWarnEnabled()) { 2206 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 2207 } 2208 } 2209 } 2210 } 2211 2212 /** 2213 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 2214 * 2215 * @param indexsource the indexsource to remove from the configuration 2216 * 2217 * @return true if remove was successful, false if preconditions for removal are ok but the given 2218 * searchindex was unknown to the manager. 2219 * 2220 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 2221 * <code>{@link I_CmsSearchIndex}</code>. 2222 * 2223 */ 2224 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2225 2226 // validation if removal will be granted 2227 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2228 I_CmsSearchIndex idx; 2229 // the list for collecting indexes that use the given index source 2230 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2231 // the current list of referred index sources of the iterated index 2232 List<CmsSearchIndexSource> refsources; 2233 while (itIndexes.hasNext()) { 2234 idx = itIndexes.next(); 2235 refsources = idx.getSources(); 2236 if (refsources != null) { 2237 if (refsources.contains(indexsource)) { 2238 referrers.add(idx); 2239 } 2240 } 2241 } 2242 if (referrers.size() > 0) { 2243 throw new CmsIllegalStateException( 2244 Messages.get().container( 2245 Messages.ERR_INDEX_SOURCE_DELETE_2, 2246 indexsource.getName(), 2247 referrers.toString())); 2248 } 2249 2250 // remove operation (no exception) 2251 return m_indexSources.remove(indexsource.getName()) != null; 2252 2253 } 2254 2255 /** 2256 * Resumes offline indexing if it was paused.<p> 2257 */ 2258 public void resumeOfflineIndexing() { 2259 2260 if (m_offlineUpdateFrequency == Long.MAX_VALUE) { 2261 setOfflineUpdateFrequency( 2262 m_configuredOfflineIndexingFrequency > 0 2263 ? m_configuredOfflineIndexingFrequency 2264 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2265 } 2266 } 2267 2268 /** 2269 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2270 * 2271 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2272 */ 2273 public void setDirectory(String value) { 2274 2275 m_path = value; 2276 } 2277 2278 /** 2279 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2280 * 2281 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2282 */ 2283 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2284 2285 m_extractionCacheMaxAge = extractionCacheMaxAge; 2286 } 2287 2288 /** 2289 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2290 * 2291 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2292 */ 2293 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2294 2295 try { 2296 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2297 } catch (NumberFormatException e) { 2298 LOG.error( 2299 Messages.get().getBundle().key( 2300 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2301 extractionCacheMaxAge, 2302 new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2303 e); 2304 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2305 } 2306 } 2307 2308 /** 2309 * Sets the unlock mode during indexing.<p> 2310 * 2311 * @param value the value 2312 */ 2313 public void setForceunlock(String value) { 2314 2315 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2316 } 2317 2318 /** 2319 * Sets the highlighter.<p> 2320 * 2321 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2322 * 2323 * @param highlighter the package/class name of the highlighter 2324 */ 2325 public void setHighlighter(String highlighter) { 2326 2327 try { 2328 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2329 } catch (Exception e) { 2330 m_highlighter = null; 2331 LOG.error(e.getLocalizedMessage(), e); 2332 } 2333 } 2334 2335 /** 2336 * Sets the seconds to wait for an index lock during an update operation.<p> 2337 * 2338 * @param value the seconds to wait for an index lock during an update operation 2339 */ 2340 public void setIndexLockMaxWaitSeconds(int value) { 2341 2342 m_indexLockMaxWaitSeconds = value; 2343 } 2344 2345 /** 2346 * Sets the max. excerpt length.<p> 2347 * 2348 * @param maxExcerptLength the max. excerpt length to set 2349 */ 2350 public void setMaxExcerptLength(int maxExcerptLength) { 2351 2352 m_maxExcerptLength = maxExcerptLength; 2353 } 2354 2355 /** 2356 * Sets the max. excerpt length as a String.<p> 2357 * 2358 * @param maxExcerptLength the max. excerpt length to set 2359 */ 2360 public void setMaxExcerptLength(String maxExcerptLength) { 2361 2362 try { 2363 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2364 } catch (Exception e) { 2365 LOG.error( 2366 Messages.get().getBundle().key( 2367 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2368 maxExcerptLength, 2369 new Integer(DEFAULT_EXCERPT_LENGTH)), 2370 e); 2371 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2372 } 2373 } 2374 2375 /** 2376 * Sets the maximal wait time for offline index updates after edit operations.<p> 2377 * 2378 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2379 */ 2380 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2381 2382 m_maxIndexWaitTime = maxIndexWaitTime; 2383 } 2384 2385 /** 2386 * Sets the maximal wait time for offline index updates after edit operations.<p> 2387 * 2388 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2389 */ 2390 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2391 2392 try { 2393 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2394 } catch (Exception e) { 2395 LOG.error( 2396 Messages.get().getBundle().key( 2397 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2398 maxIndexWaitTime, 2399 new Long(DEFAULT_MAX_INDEX_WAITTIME)), 2400 e); 2401 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2402 } 2403 } 2404 2405 /** 2406 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2407 * 2408 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2409 */ 2410 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2411 2412 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2413 } 2414 2415 /** 2416 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2417 * 2418 * @param value the maximum number of modifications to set 2419 */ 2420 public void setMaxModificationsBeforeCommit(String value) { 2421 2422 try { 2423 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2424 } catch (Exception e) { 2425 LOG.error( 2426 Messages.get().getBundle().key( 2427 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2428 value, 2429 new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2430 e); 2431 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2432 } 2433 } 2434 2435 /** 2436 * Sets the update frequency of the offline indexer in milliseconds.<p> 2437 * 2438 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2439 */ 2440 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2441 2442 m_offlineUpdateFrequency = offlineUpdateFrequency; 2443 updateOfflineIndexes(0); 2444 } 2445 2446 /** 2447 * Sets the update frequency of the offline indexer in milliseconds.<p> 2448 * 2449 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2450 */ 2451 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2452 2453 try { 2454 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2455 } catch (Exception e) { 2456 LOG.error( 2457 Messages.get().getBundle().key( 2458 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2459 offlineUpdateFrequency, 2460 new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2461 e); 2462 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2463 } 2464 } 2465 2466 /** 2467 * Sets the Solr configuration.<p> 2468 * 2469 * @param config the Solr configuration 2470 */ 2471 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2472 2473 m_solrConfig = config; 2474 } 2475 2476 /** 2477 * Sets the timeout to abandon threads indexing a resource.<p> 2478 * 2479 * @param value the timeout in milliseconds 2480 */ 2481 public void setTimeout(long value) { 2482 2483 m_timeout = value; 2484 } 2485 2486 /** 2487 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2488 * 2489 * @param value the timeout in milliseconds 2490 */ 2491 public void setTimeout(String value) { 2492 2493 try { 2494 setTimeout(Long.parseLong(value)); 2495 } catch (Exception e) { 2496 LOG.error( 2497 Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)), 2498 e); 2499 setTimeout(DEFAULT_TIMEOUT); 2500 } 2501 } 2502 2503 /** 2504 * Shuts down the search manager.<p> 2505 * 2506 * This will cause all search indices to be shut down.<p> 2507 */ 2508 public void shutDown() { 2509 2510 if (m_offlineIndexThread != null) { 2511 m_offlineIndexThread.shutDown(); 2512 } 2513 2514 if (m_offlineHandler != null) { 2515 OpenCms.removeCmsEventListener(m_offlineHandler); 2516 } 2517 2518 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2519 while (i.hasNext()) { 2520 I_CmsSearchIndex index = i.next(); 2521 index.shutDown(); 2522 index = null; 2523 } 2524 m_indexes.clear(); 2525 2526 shutDownSolrContainer(); 2527 2528 if (CmsLog.INIT.isInfoEnabled()) { 2529 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2530 } 2531 } 2532 2533 /** 2534 * Updates all offline indexes.<p> 2535 * 2536 * Can be used to force an index update when it's not convenient to wait until the 2537 * offline update interval has eclipsed.<p> 2538 * 2539 * Since the offline indexes still need some time to update the new resources, 2540 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2541 * to ensure that updating is finished. 2542 * 2543 * @see #updateOfflineIndexes(long) 2544 * 2545 */ 2546 public void updateOfflineIndexes() { 2547 2548 updateOfflineIndexes(getMaxIndexWaitTime()); 2549 } 2550 2551 /** 2552 * Updates all offline indexes.<p> 2553 * 2554 * Can be used to force an index update when it's not convenient to wait until the 2555 * offline update interval has eclipsed.<p> 2556 * 2557 * Since the offline index will still need some time to update the new resources even if it runs directly, 2558 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2559 * 2560 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2561 */ 2562 public void updateOfflineIndexes(long waitTime) { 2563 2564 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2565 // notify existing thread of update frequency change 2566 if (LOG.isDebugEnabled()) { 2567 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2568 } 2569 m_offlineIndexThread.interrupt(); 2570 if (waitTime > 0) { 2571 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2572 } 2573 } 2574 } 2575 2576 /** 2577 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2578 * We take transitive dependencies into account and handle cyclic dependencies correctly as well. 2579 * 2580 * @param adminCms an OpenCms user context with Admin permissions 2581 * @param updateResources the resources to be re-indexed 2582 * 2583 * @return the updated list of resource to re-index 2584 */ 2585 protected List<CmsPublishedResource> addAdditionallyAffectedResources( 2586 CmsObject adminCms, 2587 List<CmsPublishedResource> updateResources) { 2588 2589 Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources); 2590 Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet; 2591 Collection<CmsPublishedResource> additionalResources = Collections.emptySet(); 2592 do { 2593 additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck); 2594 additionalResources.addAll(addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck)); 2595 updateResources.addAll(additionalResources); 2596 updateResourceSet.addAll(additionalResources); 2597 resourcesToCheck = additionalResources; 2598 } while (resourcesToCheck.size() > 0); 2599 return updateResources; 2600 } 2601 2602 /** 2603 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2604 * 2605 * @param adminCms an OpenCms user context with Admin permissions 2606 * @param updateResources the resources to be re-indexed 2607 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2608 * 2609 * @return the list of resources that need to be additionally re-index 2610 */ 2611 protected Collection<CmsPublishedResource> addIndexContentRelatedResources( 2612 CmsObject adminCms, 2613 Collection<CmsPublishedResource> updateResources, 2614 Collection<CmsPublishedResource> updateResourcesToCheck) { 2615 2616 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2617 for (CmsPublishedResource checkedRes : updateResourcesToCheck) { 2618 try { 2619 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId()); 2620 filter = filter.filterType(CmsRelationType.INDEX_CONTENT); 2621 List<CmsRelation> relations = adminCms.readRelations(filter); 2622 for (CmsRelation relation : relations) { 2623 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2624 CmsPublishedResource additionalPubRes = new CmsPublishedResource(res); 2625 if (!updateResources.contains(additionalPubRes)) { 2626 additionalUpdateResources.add(additionalPubRes); 2627 } 2628 } 2629 } catch (CmsException e) { 2630 LOG.error(e.getLocalizedMessage(), e); 2631 } 2632 } 2633 return additionalUpdateResources; 2634 } 2635 2636 /** 2637 * Cleans up the extraction result cache.<p> 2638 */ 2639 protected void cleanExtractionCache() { 2640 2641 // clean up the extraction result cache 2642 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2643 } 2644 2645 /** 2646 * Collects the related containerpages to the resources that have been published.<p> 2647 * 2648 * @param adminCms an OpenCms user context with Admin permissions 2649 * @param updateResources the resources to be re-indexed 2650 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2651 * 2652 * @return the list of resources that need to be additionally re-index 2653 */ 2654 protected Collection<CmsPublishedResource> findRelatedContainerPages( 2655 CmsObject adminCms, 2656 Collection<CmsPublishedResource> updateResources, 2657 Collection<CmsPublishedResource> updateResourcesToCheck) { 2658 2659 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2660 2661 Set<CmsResource> elementGroups = new HashSet<CmsResource>(); 2662 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2663 int containerPageTypeId = -1; 2664 try { 2665 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2666 } catch (CmsLoaderException e) { 2667 // will happen during setup, when container page type is not available yet 2668 LOG.info(e.getLocalizedMessage(), e); 2669 } 2670 if (containerPageTypeId != -1) { 2671 for (CmsPublishedResource pubRes : updateResourcesToCheck) { 2672 try { 2673 if (OpenCms.getResourceManager().getResourceType( 2674 pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2675 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2676 filter.filterStrong(); 2677 List<CmsRelation> relations = adminCms.readRelations(filter); 2678 for (CmsRelation relation : relations) { 2679 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2680 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2681 containerPages.add(res); 2682 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2683 adminCms, 2684 adminCms.getSitePath(res))) { 2685 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2686 } 2687 } else 2688 if (OpenCms.getResourceManager().getResourceType(res.getTypeId()).getTypeName().equals( 2689 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) { 2690 elementGroups.add(res); 2691 } 2692 } 2693 } 2694 if (containerPageTypeId == pubRes.getType()) { 2695 addDetailContent( 2696 adminCms, 2697 containerPages, 2698 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2699 } 2700 } catch (CmsException e) { 2701 LOG.error(e.getLocalizedMessage(), e); 2702 } 2703 } 2704 for (CmsResource pubRes : elementGroups) { 2705 try { 2706 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2707 filter.filterStrong(); 2708 List<CmsRelation> relations = adminCms.readRelations(filter); 2709 for (CmsRelation relation : relations) { 2710 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2711 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2712 containerPages.add(res); 2713 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2714 adminCms, 2715 adminCms.getSitePath(res))) { 2716 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2717 } 2718 } 2719 } 2720 } catch (CmsException e) { 2721 LOG.error(e.getLocalizedMessage(), e); 2722 } 2723 } 2724 // add all found container pages as published resource objects to the list 2725 for (CmsResource page : containerPages) { 2726 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2727 if (!updateResources.contains(pubCont)) { 2728 // ensure container page is added only once 2729 additionalUpdateResources.add(pubCont); 2730 } 2731 } 2732 } 2733 return additionalUpdateResources; 2734 } 2735 2736 /** 2737 * Returns the set of names of all configured document types.<p> 2738 * 2739 * @return the set of names of all configured document types 2740 */ 2741 protected List<String> getDocumentTypes() { 2742 2743 return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet())); 2744 } 2745 2746 /** 2747 * Returns the a offline project used for offline indexing.<p> 2748 * 2749 * @return the offline project if available 2750 */ 2751 protected CmsProject getOfflineIndexProject() { 2752 2753 CmsProject result = null; 2754 for (I_CmsSearchIndex index : m_offlineIndexes) { 2755 try { 2756 result = m_adminCms.readProject(index.getProject()); 2757 2758 if (!result.isOnlineProject()) { 2759 break; 2760 } 2761 } catch (Exception e) { 2762 // may be a missconfigured index, ignore 2763 LOG.error(e.getLocalizedMessage(), e); 2764 } 2765 } 2766 return result; 2767 } 2768 2769 /** 2770 * Returns a new thread manager for the indexing threads.<p> 2771 * 2772 * @return a new thread manager for the indexing threads 2773 */ 2774 protected CmsIndexingThreadManager getThreadManager() { 2775 2776 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 2777 } 2778 2779 /** 2780 * Initializes the available Cms resource types to be indexed.<p> 2781 * 2782 * A map stores document factories keyed by a string representing 2783 * a colon separated list of Cms resource types and/or mimetypes.<p> 2784 * 2785 * The keys of this map are used to trigger a document factory to convert 2786 * a Cms resource into a Lucene index document.<p> 2787 * 2788 * A document factory is a class implementing the interface 2789 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 2790 */ 2791 protected void initAvailableDocumentTypes() { 2792 2793 CmsSearchDocumentType documenttype = null; 2794 String className = null; 2795 String name = null; 2796 I_CmsDocumentFactory documentFactory = null; 2797 List<String> resourceTypes = null; 2798 List<String> mimeTypes = null; 2799 Class<?> c = null; 2800 2801 m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>(); 2802 2803 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 2804 2805 documenttype = m_documentTypeConfigs.get(i); 2806 name = documenttype.getName(); 2807 2808 try { 2809 className = documenttype.getClassName(); 2810 resourceTypes = documenttype.getResourceTypes(); 2811 mimeTypes = documenttype.getMimeTypes(); 2812 2813 if (name == null) { 2814 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 2815 } 2816 if (className == null) { 2817 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 2818 } 2819 if (resourceTypes.size() == 0) { 2820 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 2821 } 2822 2823 try { 2824 c = Class.forName(className); 2825 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 2826 new Object[] {name}); 2827 } catch (ClassNotFoundException exc) { 2828 throw new CmsIndexException( 2829 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 2830 exc); 2831 } catch (Exception exc) { 2832 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 2833 } 2834 2835 if (documentFactory.isUsingCache()) { 2836 // init cache if used by the factory 2837 documentFactory.setCache(m_extractionResultCache); 2838 } 2839 2840 Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>(); 2841 for (Iterator<String> keyIt = documentFactory.getDocumentKeys( 2842 resourceTypes, 2843 mimeTypes).iterator(); keyIt.hasNext();) { 2844 String key = keyIt.next(); 2845 matchingTypes.put(key, documentFactory); 2846 m_extractionKeys.add(key); 2847 } 2848 m_documentTypes.put(name, matchingTypes); 2849 2850 } catch (CmsException e) { 2851 if (LOG.isWarnEnabled()) { 2852 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 2853 } 2854 } 2855 } 2856 } 2857 2858 /** 2859 * Initializes the index sources. 2860 */ 2861 protected void initIndexSources() { 2862 2863 for (CmsSearchIndexSource source : m_indexSources.values()) { 2864 source.init(); 2865 } 2866 } 2867 2868 /** 2869 * Initializes the configured search indexes.<p> 2870 * 2871 * This initializes also the list of Cms resources types 2872 * to be indexed by an index source.<p> 2873 */ 2874 protected void initSearchIndexes() { 2875 2876 I_CmsSearchIndex index = null; 2877 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2878 index = m_indexes.get(i); 2879 // reset disabled flag 2880 index.setEnabled(true); 2881 // check if the index has been configured correctly 2882 if (index.checkConfiguration(m_adminCms)) { 2883 // the index is configured correctly 2884 try { 2885 index.initialize(); 2886 } catch (Exception e) { 2887 if (CmsLog.INIT.isWarnEnabled()) { 2888 // in this case the index will be disabled 2889 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 2890 } 2891 } 2892 } 2893 // output a log message if the index was successfully configured or not 2894 if (CmsLog.INIT.isInfoEnabled()) { 2895 if (index.isEnabled()) { 2896 CmsLog.INIT.info( 2897 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 2898 } else { 2899 CmsLog.INIT.warn( 2900 Messages.get().getBundle().key( 2901 Messages.INIT_INDEX_NOT_CONFIGURED_2, 2902 index, 2903 index.getProject())); 2904 } 2905 } 2906 } 2907 } 2908 2909 /** 2910 * Checks, if the index should be rebuilt/updated at all by the search manager. 2911 * @param index the index to check. 2912 * @return a flag, indicating if the index should be rebuilt/updated at all. 2913 */ 2914 protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) { 2915 2916 if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) { 2917 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName())); 2918 return false; 2919 } else { 2920 return true; 2921 } 2922 2923 } 2924 2925 /** 2926 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 2927 * after resources have been published.<p> 2928 * 2929 * @param adminCms an OpenCms user context with Admin permissions 2930 * @param publishHistoryId the history ID of the published project 2931 * @param report the report to write the output to 2932 */ 2933 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 2934 2935 int oldPriority = Thread.currentThread().getPriority(); 2936 try { 2937 SEARCH_MANAGER_LOCK.lock(); 2938 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 2939 List<CmsPublishedResource> publishedResources; 2940 try { 2941 // read the list of all published resources 2942 publishedResources = adminCms.readPublishedResources(publishHistoryId); 2943 } catch (CmsException e) { 2944 LOG.error( 2945 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 2946 e); 2947 return; 2948 } 2949 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 2950 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 2951 2952 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 2953 for (CmsPublishedResource res : publishedResources) { 2954 if (res.isFolder() || res.getState().isUnchanged()) { 2955 // folders and unchanged resources don't need to be indexed after publish 2956 continue; 2957 } 2958 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 2959 if (updateResources.contains(res)) { 2960 // resource may have been added as a sibling of another resource 2961 // in this case we make sure to use the value from the publish list because of the "deleted" flag 2962 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 2963 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 2964 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 2965 // check it this is a moved resource with source / target info, in this case we need both entries 2966 if (!hasMoved) { 2967 // if the resource was moved, we must contain both entries 2968 updateResources.remove(res); 2969 } 2970 // "equals()" implementation of published resource checks for id, 2971 // so the removed value may have a different "deleted" or "modified" status value 2972 updateResources.add(res); 2973 } else { 2974 // resource not yet contained in the list 2975 updateResources.add(res); 2976 // check for the siblings (not for deleted resources, these are already gone) 2977 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 2978 // this resource has siblings 2979 try { 2980 // read siblings from the online project 2981 List<CmsResource> siblings = adminCms.readSiblings( 2982 res.getRootPath(), 2983 CmsResourceFilter.ALL); 2984 Iterator<CmsResource> itSib = siblings.iterator(); 2985 while (itSib.hasNext()) { 2986 // check all siblings 2987 CmsResource sibling = itSib.next(); 2988 CmsPublishedResource sib = new CmsPublishedResource(sibling); 2989 if (!updateResources.contains(sib)) { 2990 // ensure sibling is added only once 2991 updateResources.add(sib); 2992 } 2993 } 2994 } catch (CmsException e) { 2995 // ignore, just use the original resource 2996 if (LOG.isWarnEnabled()) { 2997 LOG.warn( 2998 Messages.get().getBundle().key( 2999 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 3000 res.getRootPath()), 3001 e); 3002 } 3003 } 3004 } 3005 } 3006 } 3007 } 3008 3009 addAdditionallyAffectedResources(adminCms, updateResources); 3010 updateAllIndexes(adminCms, updateResources, report); 3011 } finally { 3012 SEARCH_MANAGER_LOCK.unlock(); 3013 Thread.currentThread().setPriority(oldPriority); 3014 } 3015 } 3016 3017 /** 3018 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p> 3019 * 3020 * @param adminCms an OpenCms user context with Admin permissions 3021 * @param updateResources the resources to update 3022 * @param report the report to write the output to 3023 */ 3024 protected void updateAllIndexes( 3025 CmsObject adminCms, 3026 List<CmsPublishedResource> updateResources, 3027 I_CmsReport report) { 3028 3029 try { 3030 SEARCH_MANAGER_LOCK.lock(); 3031 if (!updateResources.isEmpty()) { 3032 // sort the resource to update 3033 Collections.sort(updateResources); 3034 // only update the indexes if the list of remaining published resources is not empty 3035 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 3036 while (i.hasNext()) { 3037 I_CmsSearchIndex index = i.next(); 3038 if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 3039 // only update indexes which have the rebuild mode set to "auto" 3040 try { 3041 updateIndex(index, report, updateResources); 3042 } catch (CmsException e) { 3043 LOG.error( 3044 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 3045 e); 3046 } 3047 } 3048 } 3049 } 3050 // clean up the extraction result cache 3051 cleanExtractionCache(); 3052 } finally { 3053 SEARCH_MANAGER_LOCK.unlock(); 3054 } 3055 3056 } 3057 3058 /** 3059 * Updates (if required creates) the index with the given name.<p> 3060 * 3061 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 3062 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 3063 * the index will be fully rebuild.<p> 3064 * 3065 * @param index the index to update or rebuild 3066 * @param report the report to write output messages to 3067 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3068 * 3069 * @throws CmsException if something goes wrong 3070 */ 3071 protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 3072 throws CmsException { 3073 3074 if (shouldUpdateAtAll(index)) { 3075 try { 3076 SEARCH_MANAGER_LOCK.lock(); 3077 3078 // copy the stored admin context for the indexing 3079 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 3080 // make sure a report is available 3081 if (report == null) { 3082 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 3083 } 3084 3085 // check if the index has been configured correctly 3086 if (!index.checkConfiguration(cms)) { 3087 // the index is disabled 3088 return; 3089 } 3090 3091 // set site root and project for this index 3092 cms.getRequestContext().setSiteRoot("/"); 3093 // switch to the index project 3094 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3095 3096 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 3097 // rebuild the complete index 3098 3099 updateIndexCompletely(cms, index, report); 3100 } else { 3101 updateIndexIncremental(cms, index, report, resourcesToIndex); 3102 } 3103 } finally { 3104 SEARCH_MANAGER_LOCK.unlock(); 3105 } 3106 } 3107 } 3108 3109 /** 3110 * The method updates all OpenCms documents that are indexed. 3111 * @param cms the OpenCms user context to use for accessing the VFS 3112 * @param index the index to update 3113 * @param report the report to write output messages to 3114 * @throws CmsIndexException thrown if indexing fails for some reason 3115 */ 3116 @SuppressWarnings("null") 3117 protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report) 3118 throws CmsIndexException { 3119 3120 // create a new thread manager for the indexing threads 3121 CmsIndexingThreadManager threadManager = getThreadManager(); 3122 3123 boolean isOfflineIndex = false; 3124 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 3125 // disable offline indexing while the complete index is rebuild 3126 isOfflineIndex = true; 3127 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL); 3128 // re-initialize the offline indexes, this will disable this offline index 3129 initOfflineIndexes(); 3130 } 3131 3132 I_CmsIndexWriter writer = null; 3133 try { 3134 // create a backup of the existing index 3135 CmsSearchIndex indexInternal = null; 3136 String backup = null; 3137 if (index instanceof CmsSearchIndex) { 3138 indexInternal = (CmsSearchIndex)index; 3139 backup = indexInternal.createIndexBackup(); 3140 if (backup != null) { 3141 indexInternal.indexSearcherOpen(backup); 3142 } 3143 } 3144 3145 // create a new index writer 3146 writer = index.getIndexWriter(report, true); 3147 if (writer instanceof I_CmsSolrIndexWriter) { 3148 try { 3149 ((I_CmsSolrIndexWriter)writer).deleteAllDocuments(); 3150 } catch (IOException e) { 3151 LOG.error(e.getMessage(), e); 3152 } 3153 } 3154 3155 // output start information on the report 3156 report.println( 3157 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 3158 I_CmsReport.FORMAT_HEADLINE); 3159 3160 // iterate all configured index sources of this index 3161 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3162 while (sources.hasNext()) { 3163 // get the next index source 3164 CmsSearchIndexSource source = sources.next(); 3165 // create the indexer 3166 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3167 // new index creation, use all resources from the index source 3168 indexer.rebuildIndex(writer, threadManager, source); 3169 3170 // wait for indexing threads to finish 3171 while (threadManager.isRunning()) { 3172 try { 3173 Thread.sleep(500); 3174 } catch (InterruptedException e) { 3175 // just continue with the loop after interruption 3176 LOG.info(e.getLocalizedMessage(), e); 3177 } 3178 } 3179 3180 // commit and optimize the index after each index source has been finished 3181 try { 3182 writer.commit(); 3183 } catch (IOException e) { 3184 if (LOG.isWarnEnabled()) { 3185 LOG.warn( 3186 Messages.get().getBundle().key( 3187 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3188 index.getName(), 3189 index.getPath()), 3190 e); 3191 } 3192 } 3193 try { 3194 writer.optimize(); 3195 } catch (IOException e) { 3196 if (LOG.isWarnEnabled()) { 3197 LOG.warn( 3198 Messages.get().getBundle().key( 3199 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 3200 index.getName(), 3201 index.getPath()), 3202 e); 3203 } 3204 } 3205 } 3206 3207 // we are sure here that indexInternal is not null 3208 if (backup != null) { 3209 // remove the backup after the files have been re-indexed 3210 indexInternal.indexSearcherClose(); 3211 indexInternal.removeIndexBackup(backup); 3212 } 3213 3214 // output finish information on the report 3215 report.println( 3216 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 3217 I_CmsReport.FORMAT_HEADLINE); 3218 3219 } finally { 3220 if (writer != null) { 3221 try { 3222 writer.close(); 3223 } catch (IOException e) { 3224 if (LOG.isWarnEnabled()) { 3225 LOG.warn( 3226 Messages.get().getBundle().key( 3227 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 3228 index.getPath(), 3229 index.getName()), 3230 e); 3231 } 3232 } 3233 } 3234 if (isOfflineIndex) { 3235 // reset the mode of the offline index 3236 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE); 3237 // re-initialize the offline indexes, this will re-enable this index 3238 initOfflineIndexes(); 3239 } 3240 // index has changed - initialize the index searcher instance 3241 index.onIndexChanged(true); 3242 } 3243 3244 // show information about indexing runtime 3245 threadManager.reportStatistics(report); 3246 } 3247 3248 /** 3249 * Incrementally updates the given index.<p> 3250 * 3251 * @param cms the OpenCms user context to use for accessing the VFS 3252 * @param index the index to update 3253 * @param report the report to write output messages to 3254 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3255 * 3256 * @throws CmsException if something goes wrong 3257 */ 3258 protected void updateIndexIncremental( 3259 CmsObject cms, 3260 I_CmsSearchIndex index, 3261 I_CmsReport report, 3262 List<CmsPublishedResource> resourcesToIndex) 3263 throws CmsException { 3264 3265 try { 3266 SEARCH_MANAGER_LOCK.lock(); 3267 3268 // update the existing index 3269 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 3270 3271 boolean hasResourcesToDelete = false; 3272 boolean hasResourcesToUpdate = false; 3273 3274 // iterate all configured index sources of this index 3275 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3276 while (sources.hasNext()) { 3277 // get the next index source 3278 CmsSearchIndexSource source = sources.next(); 3279 // create the indexer 3280 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3281 // collect the resources to update 3282 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 3283 if (!updateData.isEmpty()) { 3284 // add the update collection to the internal pipeline 3285 updateCollections.add(updateData); 3286 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 3287 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 3288 } 3289 } 3290 3291 // only start index modification if required 3292 if (hasResourcesToDelete || hasResourcesToUpdate) { 3293 // output start information on the report 3294 report.println( 3295 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 3296 I_CmsReport.FORMAT_HEADLINE); 3297 3298 I_CmsIndexWriter writer = null; 3299 try { 3300 // obtain an index writer that updates the current index 3301 writer = index.getIndexWriter(report, false); 3302 3303 if (hasResourcesToDelete) { 3304 // delete the resource from the index 3305 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3306 while (i.hasNext()) { 3307 CmsSearchIndexUpdateData updateCollection = i.next(); 3308 if (updateCollection.hasResourcesToDelete()) { 3309 updateCollection.getIndexer().deleteResources( 3310 writer, 3311 updateCollection.getResourcesToDelete()); 3312 } 3313 } 3314 } 3315 3316 if (hasResourcesToUpdate) { 3317 // create a new thread manager 3318 CmsIndexingThreadManager threadManager = getThreadManager(); 3319 3320 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3321 while (i.hasNext()) { 3322 CmsSearchIndexUpdateData updateCollection = i.next(); 3323 if (updateCollection.hasResourceToUpdate()) { 3324 updateCollection.getIndexer().updateResources( 3325 writer, 3326 threadManager, 3327 updateCollection.getResourcesToUpdate()); 3328 } 3329 } 3330 3331 // wait for indexing threads to finish 3332 while (threadManager.isRunning()) { 3333 try { 3334 Thread.sleep(500); 3335 } catch (InterruptedException e) { 3336 // just continue with the loop after interruption 3337 LOG.info(e.getLocalizedMessage(), e); 3338 } 3339 } 3340 } 3341 } finally { 3342 // close the index writer 3343 if (writer != null) { 3344 try { 3345 writer.commit(); 3346 } catch (IOException e) { 3347 LOG.error( 3348 Messages.get().getBundle().key( 3349 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3350 index.getName(), 3351 index.getPath()), 3352 e); 3353 } 3354 } 3355 // index has changed - initialize the index searcher instance 3356 index.onIndexChanged(false); 3357 } 3358 3359 // output finish information on the report 3360 report.println( 3361 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3362 I_CmsReport.FORMAT_HEADLINE); 3363 } 3364 } finally { 3365 SEARCH_MANAGER_LOCK.unlock(); 3366 } 3367 } 3368 3369 /** 3370 * Updates the offline search indexes for the given list of resources.<p> 3371 * 3372 * @param report the report to write the index information to 3373 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3374 */ 3375 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3376 3377 CmsObject cms = m_adminCms; 3378 try { 3379 // copy the administration context for the indexing 3380 cms = OpenCms.initCmsObject(m_adminCms); 3381 // set site root and project for this index 3382 cms.getRequestContext().setSiteRoot("/"); 3383 } catch (CmsException e) { 3384 LOG.error(e.getLocalizedMessage(), e); 3385 } 3386 3387 Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator(); 3388 while (j.hasNext()) { 3389 I_CmsSearchIndex index = j.next(); 3390 if (index.getSources() != null) { 3391 try { 3392 // switch to the index project 3393 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3394 updateIndexIncremental(cms, index, report, resourcesToIndex); 3395 } catch (CmsException e) { 3396 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3397 } 3398 } 3399 } 3400 } 3401 3402 /** 3403 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3404 * 3405 * @param adminCms the cms context 3406 * @param containerPages the containerpages 3407 * @param containerPage the container page site path 3408 */ 3409 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3410 3411 if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) { 3412 3413 try { 3414 CmsResource detailRes = adminCms.readResource( 3415 CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage), 3416 CmsResourceFilter.IGNORE_EXPIRATION); 3417 containerPages.add(detailRes); 3418 } catch (Throwable e) { 3419 if (LOG.isWarnEnabled()) { 3420 LOG.warn(e.getLocalizedMessage(), e); 3421 } 3422 } 3423 } 3424 } 3425 3426 /** 3427 * Creates the Solr core container.<p> 3428 * 3429 * @return the created core container 3430 */ 3431 private CoreContainer createCoreContainer() { 3432 3433 CoreContainer container = null; 3434 try { 3435 // get the core container 3436 // still no core container: create it 3437 container = CoreContainer.createAndLoad( 3438 Paths.get(m_solrConfig.getHome()), 3439 m_solrConfig.getSolrFile().toPath()); 3440 if (CmsLog.INIT.isInfoEnabled()) { 3441 CmsLog.INIT.info( 3442 Messages.get().getBundle().key( 3443 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3444 m_solrConfig.getHome(), 3445 m_solrConfig.getSolrFile().getName())); 3446 } 3447 } catch (Exception e) { 3448 LOG.error( 3449 Messages.get().getBundle().key( 3450 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3451 m_solrConfig.getSolrFile().getAbsolutePath()), 3452 e); 3453 } 3454 return container; 3455 3456 } 3457 3458 /** 3459 * Remove write.lock file in the data directory to ensure the index is unlocked. 3460 * @param dataDir the data directory of the Solr index that should be unlocked. 3461 */ 3462 private void ensureIndexIsUnlocked(String dataDir) { 3463 3464 Collection<File> lockFiles = new ArrayList<File>(2); 3465 lockFiles.add( 3466 new File( 3467 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock")); 3468 lockFiles.add( 3469 new File( 3470 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck") 3471 + "write.lock")); 3472 for (File lockFile : lockFiles) { 3473 if (lockFile.exists()) { 3474 lockFile.delete(); 3475 LOG.warn( 3476 "Forcely unlocking index with data dir \"" 3477 + dataDir 3478 + "\" by removing file \"" 3479 + lockFile.getAbsolutePath() 3480 + "\"."); 3481 } 3482 } 3483 } 3484 3485 /** 3486 * Returns the report in the given event data, if <code>null</code> 3487 * a new log report is used.<p> 3488 * 3489 * @param event the event to get the report for 3490 * 3491 * @return the report 3492 */ 3493 private I_CmsReport getEventReport(CmsEvent event) { 3494 3495 I_CmsReport report = null; 3496 if (event.getData() != null) { 3497 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3498 } 3499 if (report == null) { 3500 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3501 } 3502 return report; 3503 } 3504 3505 /** 3506 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3507 * 3508 * @param publishedResources a list of published resources 3509 * 3510 * @return the set of structure ids that satisfy the condition above 3511 */ 3512 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3513 List<CmsPublishedResource> publishedResources) { 3514 3515 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3516 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3517 for (CmsPublishedResource pubRes : publishedResources) { 3518 if (pubRes.getState().isNew()) { 3519 result.add(pubRes.getStructureId()); 3520 } 3521 if (pubRes.getState().isDeleted()) { 3522 deletedSet.add(pubRes.getStructureId()); 3523 } 3524 } 3525 result.retainAll(deletedSet); 3526 return result; 3527 } 3528 3529 /** 3530 * Shuts down the Solr core container.<p> 3531 */ 3532 private void shutDownSolrContainer() { 3533 3534 if (m_coreContainer != null) { 3535 for (SolrCore core : m_coreContainer.getCores()) { 3536 // do not unload spellcheck core because otherwise the core.properties file is removed 3537 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3538 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3539 m_coreContainer.unload(core.getName(), false, false, true); 3540 } 3541 } 3542 m_coreContainer.shutdown(); 3543 if (CmsLog.INIT.isInfoEnabled()) { 3544 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3545 } 3546 m_coreContainer = null; 3547 } 3548 } 3549 3550}