001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.configuration.CmsConfigurationException; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishedResource; 033import org.opencms.db.CmsResourceState; 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsProject; 036import org.opencms.file.CmsResource; 037import org.opencms.file.CmsResourceFilter; 038import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 039import org.opencms.file.types.CmsResourceTypeXmlContent; 040import org.opencms.i18n.CmsMessageContainer; 041import org.opencms.jsp.CmsJspTagContainer; 042import org.opencms.loader.CmsLoaderException; 043import org.opencms.main.CmsEvent; 044import org.opencms.main.CmsException; 045import org.opencms.main.CmsIllegalArgumentException; 046import org.opencms.main.CmsIllegalStateException; 047import org.opencms.main.CmsLog; 048import org.opencms.main.I_CmsEventListener; 049import org.opencms.main.OpenCms; 050import org.opencms.main.OpenCmsSolrHandler; 051import org.opencms.relations.CmsRelation; 052import org.opencms.relations.CmsRelationFilter; 053import org.opencms.report.CmsLogReport; 054import org.opencms.report.I_CmsReport; 055import org.opencms.scheduler.I_CmsScheduledJob; 056import org.opencms.search.documents.A_CmsVfsDocument; 057import org.opencms.search.documents.CmsExtractionResultCache; 058import org.opencms.search.documents.I_CmsDocumentFactory; 059import org.opencms.search.documents.I_CmsTermHighlighter; 060import org.opencms.search.fields.CmsLuceneField; 061import org.opencms.search.fields.CmsLuceneFieldConfiguration; 062import org.opencms.search.fields.CmsSearchField; 063import org.opencms.search.fields.CmsSearchFieldConfiguration; 064import org.opencms.search.fields.CmsSearchFieldMapping; 065import org.opencms.search.solr.CmsSolrConfiguration; 066import org.opencms.search.solr.CmsSolrFieldConfiguration; 067import org.opencms.search.solr.CmsSolrIndex; 068import org.opencms.search.solr.CmsSolrIndexWriter; 069import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 070import org.opencms.security.CmsRole; 071import org.opencms.security.CmsRoleViolationException; 072import org.opencms.util.A_CmsModeStringEnumeration; 073import org.opencms.util.CmsStringUtil; 074import org.opencms.util.CmsUUID; 075import org.opencms.util.CmsWaitHandle; 076 077import java.io.File; 078import java.io.IOException; 079import java.nio.file.FileSystems; 080import java.nio.file.Paths; 081import java.util.ArrayList; 082import java.util.Collections; 083import java.util.HashMap; 084import java.util.HashSet; 085import java.util.Iterator; 086import java.util.List; 087import java.util.Locale; 088import java.util.Map; 089import java.util.Set; 090import java.util.TreeMap; 091import java.util.concurrent.locks.ReentrantLock; 092 093import org.apache.commons.logging.Log; 094import org.apache.lucene.analysis.Analyzer; 095import org.apache.lucene.analysis.standard.StandardAnalyzer; 096import org.apache.lucene.analysis.util.CharArraySet; 097import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 098import org.apache.solr.client.solrj.impl.HttpSolrClient; 099import org.apache.solr.core.CoreContainer; 100import org.apache.solr.core.CoreDescriptor; 101import org.apache.solr.core.SolrCore; 102 103/** 104 * Implements the general management and configuration of the search and 105 * indexing facilities in OpenCms.<p> 106 * 107 * @since 6.0.0 108 */ 109public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 110 111 /** 112 * Enumeration class for force unlock types.<p> 113 */ 114 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 115 116 /** Force unlock type "always". */ 117 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 118 119 /** Force unlock type "never". */ 120 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 121 122 /** Force unlock type "only full". */ 123 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 124 125 /** Serializable version id. */ 126 private static final long serialVersionUID = 74746076708908673L; 127 128 /** 129 * Creates a new force unlock type with the given name.<p> 130 * 131 * @param mode the mode id to use 132 */ 133 protected CmsSearchForceUnlockMode(String mode) { 134 135 super(mode); 136 } 137 138 /** 139 * Returns the lock type for the given type value.<p> 140 * 141 * @param type the type value to get the lock type for 142 * 143 * @return the lock type for the given type value 144 */ 145 public static CmsSearchForceUnlockMode valueOf(String type) { 146 147 if (type.equals(ALWAYS.toString())) { 148 return ALWAYS; 149 } else if (type.equals(NEVER.toString())) { 150 return NEVER; 151 } else { 152 return ONLYFULL; 153 } 154 } 155 } 156 157 /** 158 * Handles offline index generation.<p> 159 */ 160 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 161 162 /** Indicates if the event handlers for the offline search have been already registered. */ 163 private boolean m_isEventRegistered; 164 165 /** The list of resources to index. */ 166 private List<CmsPublishedResource> m_resourcesToIndex; 167 168 /** 169 * Initializes the offline index handler.<p> 170 */ 171 protected CmsSearchOfflineHandler() { 172 173 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 174 } 175 176 /** 177 * Implements the event listener of this class.<p> 178 * 179 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 180 */ 181 @SuppressWarnings("unchecked") 182 public void cmsEvent(CmsEvent event) { 183 184 switch (event.getType()) { 185 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 186 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 187 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 188 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 189 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 190 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 191 // skip lock & unlock 192 return; 193 } 194 // skip indexing if flag is set in event 195 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 196 if (skip != null) { 197 return; 198 } 199 200 // a resource has been modified - offline indexes require (re)indexing 201 List<CmsResource> resources = Collections.singletonList( 202 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 203 reIndexResources(resources); 204 break; 205 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 206 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 207 I_CmsEventListener.KEY_RESOURCES); 208 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 209 for (CmsResource res : resourcesToDelete) { 210 if (res.getState().isNew()) { 211 // if the resource is new and a delete action was performed 212 // --> set the state of the resource to deleted 213 res.setState(CmsResourceState.STATE_DELETED); 214 } 215 } 216 reIndexResources(resourcesToDelete); 217 break; 218 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 219 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 220 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 221 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 222 // a list of resources has been modified - offline indexes require (re)indexing 223 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 224 break; 225 default: 226 // no operation 227 } 228 } 229 230 /** 231 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 232 * 233 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 234 */ 235 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 236 237 m_resourcesToIndex.addAll(resourcesToIndex); 238 } 239 240 /** 241 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 242 * 243 * @return the resources to index 244 */ 245 protected List<CmsPublishedResource> getResourcesToIndex() { 246 247 List<CmsPublishedResource> result; 248 synchronized (this) { 249 result = m_resourcesToIndex; 250 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 251 } 252 try { 253 CmsObject cms = m_adminCms; 254 CmsProject offline = getOfflineIndexProject(); 255 if (offline != null) { 256 // switch to the offline project if available 257 cms = OpenCms.initCmsObject(m_adminCms); 258 cms.getRequestContext().setCurrentProject(offline); 259 } 260 findRelatedContainerPages(cms, result); 261 } catch (CmsException e) { 262 LOG.error(e.getLocalizedMessage(), e); 263 } 264 return result; 265 } 266 267 /** 268 * Initializes this offline search handler, registering the event handlers if required.<p> 269 */ 270 protected void initialize() { 271 272 if (m_offlineIndexes.size() > 0) { 273 // there is at least one offline index configured 274 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 275 // create the offline indexing thread 276 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 277 // start the offline index thread 278 m_offlineIndexThread.start(); 279 } 280 } else { 281 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 282 // no offline indexes but thread still running, stop the thread 283 m_offlineIndexThread.shutDown(); 284 m_offlineIndexThread = null; 285 } 286 } 287 // do this only in case there are offline indexes configured 288 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 289 m_isEventRegistered = true; 290 // register this object as event listener 291 OpenCms.addCmsEventListener( 292 this, 293 new int[] { 294 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 295 I_CmsEventListener.EVENT_RESOURCE_CREATED, 296 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 297 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 298 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 299 I_CmsEventListener.EVENT_RESOURCE_MOVED, 300 I_CmsEventListener.EVENT_RESOURCE_DELETED, 301 I_CmsEventListener.EVENT_RESOURCE_COPIED, 302 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 303 } 304 } 305 306 /** 307 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 308 * 309 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 310 */ 311 protected synchronized void reIndexResources(List<CmsResource> resources) { 312 313 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 314 for (CmsResource res : resources) { 315 CmsPublishedResource pubRes = new CmsPublishedResource(res); 316 resourcesToIndex.add(pubRes); 317 } 318 if (resourcesToIndex.size() > 0) { 319 // add the resources found to the offline index thread 320 addResourcesToIndex(resourcesToIndex); 321 } 322 } 323 } 324 325 /** 326 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 327 */ 328 protected class CmsSearchOfflineIndexThread extends Thread { 329 330 /** The event handler that triggers this thread. */ 331 CmsSearchOfflineHandler m_handler; 332 333 /** Indicates if this thread is still alive. */ 334 boolean m_isAlive; 335 336 /** Indicates that an index update thread is currently running. */ 337 private boolean m_isUpdating; 338 339 /** If true a manual update (after file upload) was triggered. */ 340 private boolean m_updateTriggered; 341 342 /** The wait handle used for signalling when the worker thread has finished. */ 343 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 344 345 /** 346 * Constructor.<p> 347 * 348 * @param handler the offline index event handler 349 */ 350 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 351 352 super("OpenCms: Offline Search Indexer"); 353 m_handler = handler; 354 } 355 356 /** 357 * Gets the wait handle used for signalling when the worker thread has finished. 358 * 359 * @return the wait handle 360 **/ 361 public CmsWaitHandle getWaitHandle() { 362 363 return m_waitHandle; 364 } 365 366 /** 367 * @see java.lang.Thread#interrupt() 368 */ 369 @Override 370 public void interrupt() { 371 372 super.interrupt(); 373 m_updateTriggered = true; 374 } 375 376 /** 377 * @see java.lang.Thread#run() 378 */ 379 @Override 380 public void run() { 381 382 // create a log report for the output 383 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 384 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 385 m_updateTriggered = false; 386 try { 387 while (m_isAlive) { 388 if (!m_updateTriggered) { 389 try { 390 sleep(offlineUpdateFrequency); 391 } catch (InterruptedException e) { 392 // continue the thread after interruption 393 if (!m_isAlive) { 394 // the thread has been shut down while sleeping 395 continue; 396 } 397 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 398 // offline update frequency change - clear interrupt status 399 offlineUpdateFrequency = getOfflineUpdateFrequency(); 400 } 401 LOG.info(e.getLocalizedMessage(), e); 402 } 403 } 404 if (m_isAlive) { 405 // set update trigger to false since we do the update now 406 m_updateTriggered = false; 407 // get list of resource to update 408 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 409 if (resourcesToIndex.size() > 0) { 410 // only start indexing if there is at least one resource 411 startOfflineUpdateThread(report, resourcesToIndex); 412 } else { 413 getWaitHandle().release(); 414 } 415 // this is just called to clear the interrupt status of the thread 416 interrupted(); 417 } 418 } 419 } finally { 420 // make sure that live status is reset in case of Exceptions 421 m_isAlive = false; 422 } 423 424 } 425 426 /** 427 * @see java.lang.Thread#start() 428 */ 429 @Override 430 public synchronized void start() { 431 432 m_isAlive = true; 433 super.start(); 434 } 435 436 /** 437 * Obtains the list of resource to update in the offline index, 438 * then optimizes the list by removing duplicate entries.<p> 439 * 440 * @return the list of resource to update in the offline index 441 */ 442 protected List<CmsPublishedResource> getResourcesToIndex() { 443 444 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 445 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 446 447 // Reverse to always keep the last list entries 448 Collections.reverse(resourcesToIndex); 449 for (CmsPublishedResource pubRes : resourcesToIndex) { 450 boolean addResource = true; 451 for (CmsPublishedResource resRes : result) { 452 if (pubRes.equals(resRes) 453 && (pubRes.getState() == resRes.getState()) 454 && (pubRes.getMovedState() == resRes.getMovedState()) 455 && pubRes.getRootPath().equals(resRes.getRootPath())) { 456 // resource already in the update list 457 addResource = false; 458 break; 459 } 460 } 461 if (addResource) { 462 result.add(pubRes); 463 } 464 465 } 466 Collections.reverse(result); 467 return changeStateOfMoveOriginsToDeleted(result); 468 } 469 470 /** 471 * Shuts down this offline index thread.<p> 472 */ 473 protected void shutDown() { 474 475 m_isAlive = false; 476 interrupt(); 477 if (m_isUpdating) { 478 long waitTime = getOfflineUpdateFrequency() / 2; 479 int waitSteps = 0; 480 do { 481 try { 482 // wait half the time of the offline index frequency for the thread to finish 483 Thread.sleep(waitTime); 484 } catch (InterruptedException e) { 485 // continue 486 LOG.info(e.getLocalizedMessage(), e); 487 } 488 waitSteps++; 489 // wait 5 times then stop waiting 490 } while ((waitSteps < 5) && m_isUpdating); 491 } 492 } 493 494 /** 495 * Updates the offline search indexes for the given list of resources.<p> 496 * 497 * @param report the report to write the index information to 498 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 499 */ 500 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 501 502 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 503 long startTime = System.currentTimeMillis(); 504 long waitTime = getOfflineUpdateFrequency() / 2; 505 if (LOG.isDebugEnabled()) { 506 LOG.debug( 507 Messages.get().getBundle().key( 508 Messages.LOG_OI_UPDATE_START_1, 509 Integer.valueOf(resourcesToIndex.size()))); 510 } 511 512 m_isUpdating = true; 513 thread.start(); 514 515 do { 516 try { 517 // wait half the time of the offline index frequency for the thread to finish 518 thread.join(waitTime); 519 } catch (InterruptedException e) { 520 // continue 521 LOG.info(e.getLocalizedMessage(), e); 522 } 523 if (thread.isAlive()) { 524 LOG.warn( 525 Messages.get().getBundle().key( 526 Messages.LOG_OI_UPDATE_LONG_2, 527 Integer.valueOf(resourcesToIndex.size()), 528 Long.valueOf(System.currentTimeMillis() - startTime))); 529 } 530 } while (thread.isAlive()); 531 m_isUpdating = false; 532 533 if (LOG.isDebugEnabled()) { 534 LOG.debug( 535 Messages.get().getBundle().key( 536 Messages.LOG_OI_UPDATE_FINISH_2, 537 Integer.valueOf(resourcesToIndex.size()), 538 Long.valueOf(System.currentTimeMillis() - startTime))); 539 } 540 } 541 542 /** 543 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 544 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 545 * 546 * @param resourcesToIndex the resources to index 547 * 548 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 549 */ 550 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 551 List<CmsPublishedResource> resourcesToIndex) { 552 553 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 554 for (CmsPublishedResource resource : resourcesToIndex) { 555 if (resource.getState().isDeleted()) { 556 // we don't want the last path to be from a deleted resource 557 continue; 558 } 559 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 560 } 561 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 562 for (CmsPublishedResource resource : resourcesToIndex) { 563 if (resource.getState().isDeleted()) { 564 result.add(resource); 565 continue; 566 } 567 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 568 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 569 result.add(resource); 570 } else { 571 result.add( 572 new CmsPublishedResource( 573 resource.getStructureId(), 574 resource.getResourceId(), 575 resource.getPublishTag(), 576 resource.getRootPath(), 577 resource.getType(), 578 resource.isFolder(), 579 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 580 resource.getSiblingCount())); 581 } 582 } 583 return result; 584 } 585 } 586 587 /** 588 * An offline index worker Thread runs each time for every offline index update action.<p> 589 * 590 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 591 * problems if a single operation "hangs" the Tread.<p> 592 */ 593 protected class CmsSearchOfflineIndexWorkThread extends Thread { 594 595 /** The report to write the index information to. */ 596 I_CmsReport m_report; 597 598 /** The list of {@link CmsPublishedResource} objects to index. */ 599 List<CmsPublishedResource> m_resourcesToIndex; 600 601 /** 602 * Updates the offline search indexes for the given list of resources.<p> 603 * 604 * @param report the report to write the index information to 605 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 606 */ 607 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 608 609 super("OpenCms: Offline Search Index Worker"); 610 m_report = report; 611 m_resourcesToIndex = resourcesToIndex; 612 } 613 614 /** 615 * @see java.lang.Thread#run() 616 */ 617 @Override 618 public void run() { 619 620 updateIndexOffline(m_report, m_resourcesToIndex); 621 if (m_offlineIndexThread != null) { 622 m_offlineIndexThread.getWaitHandle().release(); 623 } 624 } 625 } 626 627 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 628 private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); 629 630 /** The default value used for generating search result excerpts (1024 chars). */ 631 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 632 633 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 634 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 635 636 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 637 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 638 639 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 640 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 641 642 /** The default maximal wait time for re-indexing after editing a content. */ 643 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 644 645 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 646 public static final int DEFAULT_TIMEOUT = 60000; 647 648 /** Scheduler parameter: Update only a specified list of indexes. */ 649 public static final String JOB_PARAM_INDEXLIST = "indexList"; 650 651 /** Scheduler parameter: Write the output of the update to the logfile. */ 652 public static final String JOB_PARAM_WRITELOG = "writeLog"; 653 654 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 655 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 656 657 /** The log object for this class. */ 658 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 659 660 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 661 protected CmsObject m_adminCms; 662 663 /** The list of indexes that are configured for offline index mode. */ 664 protected List<CmsSearchIndex> m_offlineIndexes; 665 666 /** The thread used of offline indexing. */ 667 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 668 669 /** Configured analyzers for languages using <analyzer>. */ 670 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 671 672 /** Stores the offline update frequency while indexing is paused. */ 673 private long m_configuredOfflineIndexingFrequency; 674 675 /** The Solr core container. */ 676 private CoreContainer m_coreContainer; 677 678 /** A map of document factory configurations. */ 679 private List<CmsSearchDocumentType> m_documentTypeConfigs; 680 681 /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */ 682 private Map<String, I_CmsDocumentFactory> m_documentTypes; 683 684 /** The max age for extraction results to remain in the cache. */ 685 private float m_extractionCacheMaxAge; 686 687 /** The cache for the extraction results. */ 688 private CmsExtractionResultCache m_extractionResultCache; 689 690 /** Contains the available field configurations. */ 691 private Map<String, CmsSearchFieldConfiguration> m_fieldConfigurations; 692 693 /** The force unlock type. */ 694 private CmsSearchForceUnlockMode m_forceUnlockMode; 695 696 /** The class used to highlight the search terms in the excerpt of a search result. */ 697 private I_CmsTermHighlighter m_highlighter; 698 699 /** A list of search indexes. */ 700 private List<CmsSearchIndex> m_indexes; 701 702 /** Seconds to wait for an index lock. */ 703 private int m_indexLockMaxWaitSeconds = 10; 704 705 /** Configured index sources. */ 706 private Map<String, CmsSearchIndexSource> m_indexSources; 707 708 /** The max. char. length of the excerpt in the search result. */ 709 private int m_maxExcerptLength; 710 711 /** The maximum number of modifications before a commit in the search index is triggered. */ 712 private int m_maxModificationsBeforeCommit; 713 714 /** The offline index search handler. */ 715 private CmsSearchOfflineHandler m_offlineHandler; 716 717 /** The update frequency of the offline indexer in milliseconds. */ 718 private long m_offlineUpdateFrequency; 719 720 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 721 private long m_maxIndexWaitTime; 722 723 /** Path to index files below WEB-INF/. */ 724 private String m_path; 725 726 /** The Solr configuration. */ 727 private CmsSolrConfiguration m_solrConfig; 728 729 /** Timeout for abandoning indexing thread. */ 730 private long m_timeout; 731 732 /** 733 * Default constructor when called as cron job.<p> 734 */ 735 public CmsSearchManager() { 736 737 m_documentTypes = new HashMap<String, I_CmsDocumentFactory>(); 738 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 739 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 740 m_indexes = new ArrayList<CmsSearchIndex>(); 741 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 742 m_offlineHandler = new CmsSearchOfflineHandler(); 743 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 744 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 745 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 746 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 747 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 748 749 m_fieldConfigurations = new HashMap<String, CmsSearchFieldConfiguration>(); 750 // make sure we have a "standard" field configuration 751 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 752 753 if (CmsLog.INIT.isInfoEnabled()) { 754 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 755 } 756 } 757 758 /** 759 * Returns an analyzer for the given class name.<p> 760 * 761 * @param className the class name of the analyzer 762 * 763 * @return the appropriate lucene analyzer 764 * 765 * @throws Exception if something goes wrong 766 */ 767 public static Analyzer getAnalyzer(String className) throws Exception { 768 769 Analyzer analyzer = null; 770 Class<?> analyzerClass; 771 try { 772 analyzerClass = Class.forName(className); 773 } catch (ClassNotFoundException e) { 774 // allow Lucene standard classes to be written in a short form 775 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 776 } 777 778 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 779 if (StandardAnalyzer.class.equals(analyzerClass)) { 780 // the Lucene standard analyzer is used - but without any stopwords. 781 // TODO: Is it a good idea to remove the default english stopwords used by default? 782 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 783 } else { 784 analyzer = (Analyzer)analyzerClass.newInstance(); 785 } 786 return analyzer; 787 } 788 789 /** 790 * Returns the Solr index configured with the parameters name. 791 * The parameters must contain a key/value pair with an existing 792 * Solr index, otherwise <code>null</code> is returned.<p> 793 * 794 * @param cms the current context 795 * @param params the parameter map 796 * 797 * @return the best matching Solr index 798 */ 799 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 800 801 String indexName = null; 802 CmsSolrIndex index = null; 803 // try to get the index name from the parameters: 'core' or 'index' 804 if (params != null) { 805 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 806 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 807 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 808 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 809 : null); 810 } 811 if (indexName == null) { 812 // if no parameter is specified try to use the default online/offline indexes by context 813 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 814 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 815 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 816 } 817 // try to get the index 818 index = indexName != null ? OpenCms.getSearchManager().getIndexSolr(indexName) : null; 819 if (index == null) { 820 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 821 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 822 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 823 index = solrs.get(0); 824 } 825 } 826 return index; 827 } 828 829 /** 830 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 831 * 832 * @param indexName the name of the index to check 833 * 834 * @return <code>true</code> if the index for the given name is a Lucene index 835 */ 836 public static boolean isLuceneIndex(String indexName) { 837 838 CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 839 if (i instanceof CmsSolrIndex) { 840 return false; 841 } 842 return true; 843 } 844 845 /** 846 * Adds an analyzer.<p> 847 * 848 * @param analyzer an analyzer 849 */ 850 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 851 852 m_analyzers.put(analyzer.getLocale(), analyzer); 853 854 if (CmsLog.INIT.isInfoEnabled()) { 855 CmsLog.INIT.info( 856 Messages.get().getBundle().key( 857 Messages.INIT_ADD_ANALYZER_2, 858 analyzer.getLocale(), 859 analyzer.getClassName())); 860 } 861 } 862 863 /** 864 * Adds a document type.<p> 865 * 866 * @param documentType a document type 867 */ 868 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 869 870 m_documentTypeConfigs.add(documentType); 871 872 if (CmsLog.INIT.isInfoEnabled()) { 873 CmsLog.INIT.info( 874 Messages.get().getBundle().key( 875 Messages.INIT_SEARCH_DOC_TYPES_2, 876 documentType.getName(), 877 documentType.getClassName())); 878 } 879 } 880 881 /** 882 * Adds a search field configuration to the search manager.<p> 883 * 884 * @param fieldConfiguration the search field configuration to add 885 */ 886 public void addFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration) { 887 888 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 889 if (fieldConfiguration.getFields().isEmpty()) { 890 LOG.debug( 891 Messages.get().getBundle().key( 892 Messages.LOG_FIELD_CONFIGURATION_IS_EMPTY_1, 893 fieldConfiguration.getName())); 894 } 895 } 896 897 /** 898 * Adds a search index to the configuration.<p> 899 * 900 * @param searchIndex the search index to add 901 */ 902 public void addSearchIndex(CmsSearchIndex searchIndex) { 903 904 if ((searchIndex.getSources() == null) || (searchIndex.getPath() == null)) { 905 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 906 try { 907 searchIndex.initialize(); 908 } catch (CmsException e) { 909 // should never happen 910 LOG.error(e.getMessage(), e); 911 } 912 } 913 } 914 915 // name: not null or emtpy and unique 916 String name = searchIndex.getName(); 917 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 918 throw new CmsIllegalArgumentException( 919 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 920 } 921 if (m_indexSources.keySet().contains(name)) { 922 throw new CmsIllegalArgumentException( 923 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 924 } 925 926 m_indexes.add(searchIndex); 927 if (m_adminCms != null) { 928 initOfflineIndexes(); 929 } 930 931 if (CmsLog.INIT.isInfoEnabled()) { 932 CmsLog.INIT.info( 933 Messages.get().getBundle().key( 934 Messages.INIT_ADD_SEARCH_INDEX_2, 935 searchIndex.getName(), 936 searchIndex.getProject())); 937 } 938 } 939 940 /** 941 * Adds a search index source configuration.<p> 942 * 943 * @param searchIndexSource a search index source configuration 944 */ 945 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 946 947 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 948 949 if (CmsLog.INIT.isInfoEnabled()) { 950 CmsLog.INIT.info( 951 Messages.get().getBundle().key( 952 Messages.INIT_SEARCH_INDEX_SOURCE_2, 953 searchIndexSource.getName(), 954 searchIndexSource.getIndexerClassName())); 955 } 956 } 957 958 /** 959 * Implements the event listener of this class.<p> 960 * 961 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 962 */ 963 public void cmsEvent(CmsEvent event) { 964 965 switch (event.getType()) { 966 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 967 List<String> indexNames = null; 968 if ((event.getData() != null) 969 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 970 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 971 indexNames = CmsStringUtil.splitAsList( 972 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 973 ",", 974 true); 975 } 976 try { 977 if (LOG.isDebugEnabled()) { 978 LOG.debug( 979 Messages.get().getBundle().key( 980 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 981 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 982 new Exception()); 983 } 984 if (indexNames == null) { 985 rebuildAllIndexes(getEventReport(event)); 986 } else { 987 rebuildIndexes(indexNames, getEventReport(event)); 988 } 989 } catch (CmsException e) { 990 if (LOG.isErrorEnabled()) { 991 LOG.error( 992 Messages.get().getBundle().key( 993 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 994 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 995 e); 996 } 997 } 998 break; 999 case I_CmsEventListener.EVENT_CLEAR_CACHES: 1000 if (LOG.isDebugEnabled()) { 1001 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 1002 } 1003 break; 1004 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 1005 // event data contains a list of the published resources 1006 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1007 if (LOG.isDebugEnabled()) { 1008 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1009 } 1010 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1011 if (LOG.isDebugEnabled()) { 1012 LOG.debug( 1013 Messages.get().getBundle().key( 1014 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1015 publishHistoryId)); 1016 } 1017 break; 1018 default: 1019 // no operation 1020 } 1021 } 1022 1023 /** 1024 * Returns all Solr index.<p> 1025 * 1026 * @return all Solr indexes 1027 */ 1028 public List<CmsSolrIndex> getAllSolrIndexes() { 1029 1030 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1031 for (String indexName : getIndexNames()) { 1032 CmsSolrIndex index = getIndexSolr(indexName); 1033 if (index != null) { 1034 result.add(index); 1035 } 1036 } 1037 return result; 1038 } 1039 1040 /** 1041 * Returns an analyzer for the given language.<p> 1042 * 1043 * The analyzer is selected according to the analyzer configuration.<p> 1044 * 1045 * @param locale the locale to get the analyzer for 1046 * @return the appropriate lucene analyzer 1047 * 1048 * @throws CmsSearchException if something goes wrong 1049 */ 1050 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1051 1052 Analyzer analyzer = null; 1053 String className = null; 1054 1055 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1056 if (analyzerConf == null) { 1057 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1058 } 1059 1060 try { 1061 analyzer = getAnalyzer(analyzerConf.getClassName()); 1062 } catch (Exception e) { 1063 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1064 } 1065 1066 return analyzer; 1067 } 1068 1069 /** 1070 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1071 * 1072 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1073 * 1074 * @return an unmodifiable view of the Analyzers Map 1075 */ 1076 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1077 1078 return Collections.unmodifiableMap(m_analyzers); 1079 } 1080 1081 /** 1082 * Returns the search analyzer for the given locale.<p> 1083 * 1084 * @param locale the locale to get the analyzer for 1085 * 1086 * @return the search analyzer for the given locale 1087 */ 1088 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1089 1090 return m_analyzers.get(locale); 1091 } 1092 1093 /** 1094 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1095 * 1096 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1097 */ 1098 public String getDirectory() { 1099 1100 return m_path; 1101 } 1102 1103 /** 1104 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1105 * 1106 * @return the Solr home directory 1107 */ 1108 public String getDirectorySolr() { 1109 1110 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1111 } 1112 1113 /** 1114 * Returns a lucene document factory for given resource.<p> 1115 * 1116 * The type of the document factory is selected by the type of the resource 1117 * and the MIME type of the resource content, according to the configuration in <code>opencms-search.xml</code>.<p> 1118 * 1119 * @param resource a cms resource 1120 * @return a lucene document factory or null 1121 */ 1122 public I_CmsDocumentFactory getDocumentFactory(CmsResource resource) { 1123 1124 // first get the MIME type of the resource 1125 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1126 String resourceType = null; 1127 try { 1128 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1129 } catch (CmsLoaderException e) { 1130 // ignore, unknown resource type, resource can not be indexed 1131 LOG.info(e.getLocalizedMessage(), e); 1132 } 1133 return getDocumentFactory(resourceType, mimeType); 1134 } 1135 1136 /** 1137 * Returns a lucene document factory for given resource type and MIME type.<p> 1138 * 1139 * The type of the document factory is selected according to the configuration 1140 * in <code>opencms-search.xml</code>.<p> 1141 * 1142 * @param resourceType the resource type name 1143 * @param mimeType the MIME type 1144 * 1145 * @return a lucene document factory or null in case no matching factory was found 1146 */ 1147 public I_CmsDocumentFactory getDocumentFactory(String resourceType, String mimeType) { 1148 1149 I_CmsDocumentFactory result = null; 1150 if (resourceType != null) { 1151 // create the factory lookup key for the document 1152 String documentTypeKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1153 // check if a setting is available for this specific MIME type 1154 result = m_documentTypes.get(documentTypeKey); 1155 if (result == null) { 1156 // no setting is available, try to use a generic setting without MIME type 1157 result = m_documentTypes.get(A_CmsVfsDocument.getDocumentKey(resourceType, null)); 1158 // please note: the result may still be null 1159 } 1160 } 1161 return result; 1162 } 1163 1164 /** 1165 * Returns a document type config.<p> 1166 * 1167 * @param name the name of the document type config 1168 * @return the document type config. 1169 */ 1170 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1171 1172 // this is really used only for the search manager GUI, 1173 // so performance is not an issue and no lookup map is generated 1174 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1175 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1176 if (type.getName().equals(name)) { 1177 return type; 1178 } 1179 } 1180 return null; 1181 } 1182 1183 /** 1184 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1185 * 1186 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1187 */ 1188 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1189 1190 return Collections.unmodifiableList(m_documentTypeConfigs); 1191 } 1192 1193 /** 1194 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1195 * 1196 * @return the maximum age a text extraction result is kept in the cache (in hours) 1197 */ 1198 public float getExtractionCacheMaxAge() { 1199 1200 return m_extractionCacheMaxAge; 1201 } 1202 1203 /** 1204 * Returns the search field configuration with the given name.<p> 1205 * 1206 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1207 * 1208 * @param name the name to get the search field configuration for 1209 * 1210 * @return the search field configuration with the given name 1211 */ 1212 public CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1213 1214 return m_fieldConfigurations.get(name); 1215 } 1216 1217 /** 1218 * Returns the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries.<p> 1219 * 1220 * @return the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries 1221 */ 1222 public List<CmsSearchFieldConfiguration> getFieldConfigurations() { 1223 1224 List<CmsSearchFieldConfiguration> result = new ArrayList<CmsSearchFieldConfiguration>( 1225 m_fieldConfigurations.values()); 1226 Collections.sort(result); 1227 return Collections.unmodifiableList(result); 1228 } 1229 1230 /** 1231 * Returns the Lucene search field configurations only.<p> 1232 * 1233 * @return the Lucene search field configurations 1234 */ 1235 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1236 1237 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1238 for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1239 if (conf instanceof CmsLuceneFieldConfiguration) { 1240 result.add((CmsLuceneFieldConfiguration)conf); 1241 } 1242 } 1243 Collections.sort(result); 1244 return Collections.unmodifiableList(result); 1245 } 1246 1247 /** 1248 * Returns the Solr search field configurations only.<p> 1249 * 1250 * @return the Solr search field configurations 1251 */ 1252 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1253 1254 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1255 for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1256 if (conf instanceof CmsSolrFieldConfiguration) { 1257 result.add((CmsSolrFieldConfiguration)conf); 1258 } 1259 } 1260 Collections.sort(result); 1261 return Collections.unmodifiableList(result); 1262 } 1263 1264 /** 1265 * Returns the force unlock mode during indexing.<p> 1266 * 1267 * @return the force unlock mode during indexing 1268 */ 1269 public CmsSearchForceUnlockMode getForceunlock() { 1270 1271 return m_forceUnlockMode; 1272 } 1273 1274 /** 1275 * Returns the highlighter.<p> 1276 * 1277 * @return the highlighter 1278 */ 1279 public I_CmsTermHighlighter getHighlighter() { 1280 1281 return m_highlighter; 1282 } 1283 1284 /** 1285 * Returns the Lucene search index configured with the given name.<p> 1286 * The index must exist, otherwise <code>null</code> is returned. 1287 * 1288 * @param indexName then name of the requested search index 1289 * 1290 * @return the Lucene search index configured with the given name 1291 */ 1292 public CmsSearchIndex getIndex(String indexName) { 1293 1294 for (CmsSearchIndex index : m_indexes) { 1295 if (indexName.equalsIgnoreCase(index.getName())) { 1296 return index; 1297 } 1298 } 1299 return null; 1300 } 1301 1302 /** 1303 * Returns the seconds to wait for an index lock during an update operation.<p> 1304 * 1305 * @return the seconds to wait for an index lock during an update operation 1306 */ 1307 public int getIndexLockMaxWaitSeconds() { 1308 1309 return m_indexLockMaxWaitSeconds; 1310 } 1311 1312 /** 1313 * Returns the names of all configured indexes.<p> 1314 * 1315 * @return list of names 1316 */ 1317 public List<String> getIndexNames() { 1318 1319 List<String> indexNames = new ArrayList<String>(); 1320 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1321 indexNames.add((m_indexes.get(i)).getName()); 1322 } 1323 1324 return indexNames; 1325 } 1326 1327 /** 1328 * Returns the Solr index configured with the given name.<p> 1329 * The index must exist, otherwise <code>null</code> is returned. 1330 * 1331 * @param indexName then name of the requested Solr index 1332 * @return the Solr index configured with the given name 1333 */ 1334 public CmsSolrIndex getIndexSolr(String indexName) { 1335 1336 CmsSearchIndex index = getIndex(indexName); 1337 if (index instanceof CmsSolrIndex) { 1338 return (CmsSolrIndex)index; 1339 } 1340 return null; 1341 } 1342 1343 /** 1344 * Returns a search index source for a specified source name.<p> 1345 * 1346 * @param sourceName the name of the index source 1347 * @return a search index source 1348 */ 1349 public CmsSearchIndexSource getIndexSource(String sourceName) { 1350 1351 return m_indexSources.get(sourceName); 1352 } 1353 1354 /** 1355 * Returns the max. excerpt length.<p> 1356 * 1357 * @return the max excerpt length 1358 */ 1359 public int getMaxExcerptLength() { 1360 1361 return m_maxExcerptLength; 1362 } 1363 1364 /** 1365 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1366 * 1367 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1368 */ 1369 public long getMaxIndexWaitTime() { 1370 1371 return m_maxIndexWaitTime; 1372 } 1373 1374 /** 1375 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1376 * 1377 * @return the maximum number of modifications before a commit in the search index is triggered 1378 */ 1379 public int getMaxModificationsBeforeCommit() { 1380 1381 return m_maxModificationsBeforeCommit; 1382 } 1383 1384 /** 1385 * Returns the update frequency of the offline indexer in milliseconds.<p> 1386 * 1387 * @return the update frequency of the offline indexer in milliseconds 1388 */ 1389 public long getOfflineUpdateFrequency() { 1390 1391 return m_offlineUpdateFrequency; 1392 } 1393 1394 /** 1395 * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p> 1396 * 1397 * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances 1398 */ 1399 public List<CmsSearchIndex> getSearchIndexes() { 1400 1401 return Collections.unmodifiableList(m_indexes); 1402 } 1403 1404 /** 1405 * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p> 1406 * 1407 * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances 1408 */ 1409 public List<CmsSearchIndex> getSearchIndexesAll() { 1410 1411 return Collections.unmodifiableList(m_indexes); 1412 } 1413 1414 /** 1415 * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p> 1416 * 1417 * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances 1418 */ 1419 public List<CmsSolrIndex> getSearchIndexesSolr() { 1420 1421 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1422 for (CmsSearchIndex index : m_indexes) { 1423 if (index instanceof CmsSolrIndex) { 1424 indexes.add((CmsSolrIndex)index); 1425 } 1426 } 1427 return Collections.unmodifiableList(indexes); 1428 } 1429 1430 /** 1431 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1432 * 1433 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1434 */ 1435 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1436 1437 return Collections.unmodifiableMap(m_indexSources); 1438 } 1439 1440 /** 1441 * Return singleton instance of the OpenCms spellchecker.<p> 1442 * 1443 * @param cms the cms object. 1444 * 1445 * @return instance of CmsSolrSpellchecker. 1446 */ 1447 public CmsSolrSpellchecker getSolrDictionary(CmsObject cms) { 1448 1449 // get the core container that contains one core for each configured index 1450 if (m_coreContainer == null) { 1451 m_coreContainer = createCoreContainer(); 1452 } 1453 SolrCore spellcheckCore = m_coreContainer.getCore(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE); 1454 if (spellcheckCore == null) { 1455 LOG.error( 1456 Messages.get().getBundle().key( 1457 Messages.ERR_SPELLCHECK_CORE_NOT_AVAILABLE_1, 1458 CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)); 1459 return null; 1460 } else { 1461 return CmsSolrSpellchecker.getInstance(m_coreContainer, spellcheckCore); 1462 } 1463 } 1464 1465 /** 1466 * Returns the Solr configuration.<p> 1467 * 1468 * @return the Solr configuration 1469 */ 1470 public CmsSolrConfiguration getSolrServerConfiguration() { 1471 1472 return m_solrConfig; 1473 } 1474 1475 /** 1476 * Returns the timeout to abandon threads indexing a resource.<p> 1477 * 1478 * @return the timeout to abandon threads indexing a resource 1479 */ 1480 public long getTimeout() { 1481 1482 return m_timeout; 1483 } 1484 1485 /** 1486 * Initializes the search manager.<p> 1487 * 1488 * @param cms the cms object 1489 * 1490 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1491 */ 1492 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1493 1494 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1495 try { 1496 // store the Admin cms to index Cms resources 1497 m_adminCms = OpenCms.initCmsObject(cms); 1498 } catch (CmsException e) { 1499 // this should never happen 1500 LOG.error(e.getLocalizedMessage(), e); 1501 } 1502 // make sure the site root is the root site 1503 m_adminCms.getRequestContext().setSiteRoot("/"); 1504 1505 // create the extraction result cache 1506 m_extractionResultCache = new CmsExtractionResultCache( 1507 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1508 "/extractCache"); 1509 initializeIndexes(); 1510 initOfflineIndexes(); 1511 1512 // register this object as event listener 1513 OpenCms.addCmsEventListener( 1514 this, 1515 new int[] { 1516 I_CmsEventListener.EVENT_CLEAR_CACHES, 1517 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1518 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES}); 1519 } 1520 1521 /** 1522 * Initializes all configured document types and search indexes.<p> 1523 * 1524 * This methods needs to be called if after a change in the index configuration has been made. 1525 */ 1526 public void initializeIndexes() { 1527 1528 initAvailableDocumentTypes(); 1529 initSearchIndexes(); 1530 } 1531 1532 /** 1533 * Initialize the offline index handler, require after an offline index has been added.<p> 1534 */ 1535 public void initOfflineIndexes() { 1536 1537 // check which indexes are configured as offline indexes 1538 List<CmsSearchIndex> offlineIndexes = new ArrayList<CmsSearchIndex>(); 1539 Iterator<CmsSearchIndex> i = m_indexes.iterator(); 1540 while (i.hasNext()) { 1541 CmsSearchIndex index = i.next(); 1542 if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1543 // this is an offline index 1544 offlineIndexes.add(index); 1545 } 1546 } 1547 m_offlineIndexes = offlineIndexes; 1548 m_offlineHandler.initialize(); 1549 1550 } 1551 1552 /** 1553 * Returns if the offline indexing is paused.<p> 1554 * 1555 * @return <code>true</code> if the offline indexing is paused 1556 */ 1557 public boolean isOfflineIndexingPaused() { 1558 1559 return m_offlineUpdateFrequency == Long.MAX_VALUE; 1560 } 1561 1562 /** 1563 * Updates the indexes from as a scheduled job.<p> 1564 * 1565 * @param cms the OpenCms user context to use when reading resources from the VFS 1566 * @param parameters the parameters for the scheduled job 1567 * 1568 * @throws Exception if something goes wrong 1569 * 1570 * @return the String to write in the scheduler log 1571 * 1572 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 1573 */ 1574 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 1575 1576 CmsSearchManager manager = OpenCms.getSearchManager(); 1577 1578 I_CmsReport report = null; 1579 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 1580 1581 if (writeLog) { 1582 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 1583 } 1584 1585 List<String> updateList = null; 1586 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 1587 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 1588 // index list has been provided as job parameter 1589 updateList = new ArrayList<String>(); 1590 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 1591 for (int i = 0; i < indexNames.length; i++) { 1592 // check if the index actually exists 1593 if (manager.getIndex(indexNames[i]) != null) { 1594 updateList.add(indexNames[i]); 1595 } else { 1596 if (LOG.isWarnEnabled()) { 1597 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 1598 } 1599 } 1600 } 1601 } 1602 1603 long startTime = System.currentTimeMillis(); 1604 1605 if (updateList == null) { 1606 // all indexes need to be updated 1607 manager.rebuildAllIndexes(report); 1608 } else { 1609 // rebuild only the selected indexes 1610 manager.rebuildIndexes(updateList, report); 1611 } 1612 1613 long runTime = System.currentTimeMillis() - startTime; 1614 1615 String finishMessage = Messages.get().getBundle().key( 1616 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 1617 CmsStringUtil.formatRuntime(runTime)); 1618 1619 if (LOG.isInfoEnabled()) { 1620 LOG.info(finishMessage); 1621 } 1622 return finishMessage; 1623 } 1624 1625 /** 1626 * Pauses the offline indexing.<p> 1627 * May take some time, because the indexes are updated first.<p> 1628 */ 1629 public void pauseOfflineIndexing() { 1630 1631 if (m_offlineUpdateFrequency != Long.MAX_VALUE) { 1632 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 1633 m_offlineUpdateFrequency = Long.MAX_VALUE; 1634 updateOfflineIndexes(0); 1635 } 1636 } 1637 1638 /** 1639 * Rebuilds (if required creates) all configured indexes.<p> 1640 * 1641 * @param report the report object to write messages (or <code>null</code>) 1642 * 1643 * @throws CmsException if something goes wrong 1644 */ 1645 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 1646 1647 try { 1648 SEARCH_MANAGER_LOCK.lock(); 1649 1650 CmsMessageContainer container = null; 1651 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1652 // iterate all configured search indexes 1653 CmsSearchIndex searchIndex = m_indexes.get(i); 1654 try { 1655 // update the index 1656 updateIndex(searchIndex, report, null); 1657 } catch (CmsException e) { 1658 container = new CmsMessageContainer( 1659 Messages.get(), 1660 Messages.ERR_INDEX_REBUILD_ALL_1, 1661 new Object[] {searchIndex.getName()}); 1662 LOG.error( 1663 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 1664 e); 1665 } 1666 } 1667 // clean up the extraction result cache 1668 cleanExtractionCache(); 1669 if (container != null) { 1670 // throw stored exception 1671 throw new CmsSearchException(container); 1672 } 1673 } finally { 1674 SEARCH_MANAGER_LOCK.unlock(); 1675 } 1676 } 1677 1678 /** 1679 * Rebuilds (if required creates) the index with the given name.<p> 1680 * 1681 * @param indexName the name of the index to rebuild 1682 * @param report the report object to write messages (or <code>null</code>) 1683 * 1684 * @throws CmsException if something goes wrong 1685 */ 1686 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 1687 1688 try { 1689 SEARCH_MANAGER_LOCK.lock(); 1690 // get the search index by name 1691 CmsSearchIndex index = getIndex(indexName); 1692 // update the index 1693 updateIndex(index, report, null); 1694 // clean up the extraction result cache 1695 cleanExtractionCache(); 1696 } finally { 1697 SEARCH_MANAGER_LOCK.unlock(); 1698 } 1699 } 1700 1701 /** 1702 * Rebuilds (if required creates) the List of indexes with the given name.<p> 1703 * 1704 * @param indexNames the names (String) of the index to rebuild 1705 * @param report the report object to write messages (or <code>null</code>) 1706 * 1707 * @throws CmsException if something goes wrong 1708 */ 1709 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 1710 1711 try { 1712 SEARCH_MANAGER_LOCK.lock(); 1713 Iterator<String> i = indexNames.iterator(); 1714 while (i.hasNext()) { 1715 String indexName = i.next(); 1716 // get the search index by name 1717 CmsSearchIndex index = getIndex(indexName); 1718 if (index != null) { 1719 // update the index 1720 updateIndex(index, report, null); 1721 } else { 1722 if (LOG.isWarnEnabled()) { 1723 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1724 } 1725 } 1726 } 1727 // clean up the extraction result cache 1728 cleanExtractionCache(); 1729 } finally { 1730 SEARCH_MANAGER_LOCK.unlock(); 1731 } 1732 } 1733 1734 /** 1735 * Registers a new Solr core for the given index.<p> 1736 * 1737 * @param index the index to register a new Solr core for 1738 * 1739 * @throws CmsConfigurationException if no Solr server is configured 1740 */ 1741 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 1742 1743 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 1744 // No solr server configured 1745 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 1746 } 1747 1748 if (m_solrConfig.getServerUrl() != null) { 1749 // HTTP Server configured 1750 // TODO Implement multi core support for HTTP server 1751 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 1752 index.setSolrServer(new HttpSolrClient(m_solrConfig.getServerUrl())); 1753 } 1754 1755 // get the core container that contains one core for each configured index 1756 if (m_coreContainer == null) { 1757 m_coreContainer = createCoreContainer(); 1758 } 1759 1760 // create a new core if no core exists for the given index 1761 if (!m_coreContainer.getCoreNames().contains(index.getCoreName())) { 1762 // Being sure the core container is not 'null', 1763 // we can create a core for this index if not already existent 1764 File dataDir = new File(index.getPath()); 1765 if (!dataDir.exists()) { 1766 dataDir.mkdirs(); 1767 if (CmsLog.INIT.isInfoEnabled()) { 1768 CmsLog.INIT.info( 1769 Messages.get().getBundle().key( 1770 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 1771 index.getName(), 1772 index.getPath())); 1773 } 1774 } 1775 File instanceDir = new File( 1776 m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 1777 if (!instanceDir.exists()) { 1778 instanceDir.mkdirs(); 1779 if (CmsLog.INIT.isInfoEnabled()) { 1780 CmsLog.INIT.info( 1781 Messages.get().getBundle().key( 1782 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 1783 index.getName(), 1784 index.getPath())); 1785 } 1786 } 1787 1788 // create the core 1789 // TODO: suboptimal - forces always the same schema 1790 SolrCore core = null; 1791 try { 1792 // creation includes registration. 1793 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 1794 Map<String, String> properties = new HashMap<String, String>(3); 1795 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 1796 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 1797 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties); 1798 } catch (NullPointerException e) { 1799 if (core != null) { 1800 core.close(); 1801 } 1802 throw new CmsConfigurationException( 1803 Messages.get().container( 1804 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 1805 index.getName() + " (" + index.getCoreName() + ")", 1806 index.getPath(), 1807 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 1808 e); 1809 } 1810 } 1811 if (index.isNoSolrServerSet()) { 1812 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 1813 } 1814 if (CmsLog.INIT.isInfoEnabled()) { 1815 CmsLog.INIT.info( 1816 Messages.get().getBundle().key( 1817 Messages.INIT_SOLR_SERVER_CREATED_1, 1818 index.getName() + " (" + index.getCoreName() + ")")); 1819 } 1820 } 1821 1822 /** 1823 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 1824 * 1825 * @param fieldConfiguration the field configuration to remove from the configuration 1826 * 1827 * @return true if remove was successful, false if preconditions for removal are ok but the given 1828 * field configuration was unknown to the manager. 1829 * 1830 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 1831 * <code>{@link CmsSearchIndex}</code>. 1832 * 1833 */ 1834 public boolean removeSearchFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration) 1835 throws CmsIllegalStateException { 1836 1837 // never remove the standard field configuration 1838 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 1839 throw new CmsIllegalStateException( 1840 Messages.get().container( 1841 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 1842 fieldConfiguration.getName())); 1843 } 1844 // validation if removal will be granted 1845 Iterator<CmsSearchIndex> itIndexes = m_indexes.iterator(); 1846 CmsSearchIndex idx; 1847 // the list for collecting indexes that use the given field configuration 1848 List<CmsSearchIndex> referrers = new ArrayList<CmsSearchIndex>(); 1849 CmsSearchFieldConfiguration refFieldConfig; 1850 while (itIndexes.hasNext()) { 1851 idx = itIndexes.next(); 1852 refFieldConfig = idx.getFieldConfiguration(); 1853 if (refFieldConfig.equals(fieldConfiguration)) { 1854 referrers.add(idx); 1855 } 1856 } 1857 if (referrers.size() > 0) { 1858 throw new CmsIllegalStateException( 1859 Messages.get().container( 1860 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 1861 fieldConfiguration.getName(), 1862 referrers.toString())); 1863 } 1864 1865 // remove operation (no exception) 1866 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 1867 1868 } 1869 1870 /** 1871 * Removes a search field from the field configuration.<p> 1872 * 1873 * @param fieldConfiguration the field configuration 1874 * @param field field to remove from the field configuration 1875 * 1876 * @return true if remove was successful, false if preconditions for removal are ok but the given 1877 * field was unknown. 1878 * 1879 * @throws CmsIllegalStateException if the given field is the last field inside the given field configuration. 1880 */ 1881 public boolean removeSearchFieldConfigurationField( 1882 CmsSearchFieldConfiguration fieldConfiguration, 1883 CmsSearchField field) 1884 throws CmsIllegalStateException { 1885 1886 if (fieldConfiguration.getFields().size() < 2) { 1887 throw new CmsIllegalStateException( 1888 Messages.get().container( 1889 Messages.ERR_CONFIGURATION_FIELD_DELETE_2, 1890 field.getName(), 1891 fieldConfiguration.getName())); 1892 } else { 1893 1894 if (LOG.isInfoEnabled()) { 1895 LOG.info( 1896 Messages.get().getBundle().key( 1897 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 1898 field.getName(), 1899 fieldConfiguration.getName())); 1900 } 1901 1902 return fieldConfiguration.getFields().remove(field); 1903 } 1904 } 1905 1906 /** 1907 * Removes a search field mapping from the given field.<p> 1908 * 1909 * @param field the field 1910 * @param mapping mapping to remove from the field 1911 * 1912 * @return true if remove was successful, false if preconditions for removal are ok but the given 1913 * mapping was unknown. 1914 * 1915 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 1916 */ 1917 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 1918 throws CmsIllegalStateException { 1919 1920 if (field.getMappings().size() < 2) { 1921 throw new CmsIllegalStateException( 1922 Messages.get().container( 1923 Messages.ERR_FIELD_MAPPING_DELETE_2, 1924 mapping.getType().toString(), 1925 field.getName())); 1926 } else { 1927 1928 if (LOG.isInfoEnabled()) { 1929 LOG.info( 1930 Messages.get().getBundle().key( 1931 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 1932 mapping.toString(), 1933 field.getName())); 1934 } 1935 return field.getMappings().remove(mapping); 1936 } 1937 } 1938 1939 /** 1940 * Removes a search index from the configuration.<p> 1941 * 1942 * @param searchIndex the search index to remove 1943 */ 1944 public void removeSearchIndex(CmsSearchIndex searchIndex) { 1945 1946 // shut down index to remove potential config files of Solr indexes 1947 searchIndex.shutDown(); 1948 if (searchIndex instanceof CmsSolrIndex) { 1949 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 1950 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 1951 } 1952 m_indexes.remove(searchIndex); 1953 initOfflineIndexes(); 1954 1955 if (LOG.isInfoEnabled()) { 1956 LOG.info( 1957 Messages.get().getBundle().key( 1958 Messages.LOG_REMOVE_SEARCH_INDEX_2, 1959 searchIndex.getName(), 1960 searchIndex.getProject())); 1961 } 1962 } 1963 1964 /** 1965 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 1966 * 1967 * @param indexNames the names of the index to remove 1968 */ 1969 public void removeSearchIndexes(List<String> indexNames) { 1970 1971 Iterator<String> i = indexNames.iterator(); 1972 while (i.hasNext()) { 1973 String indexName = i.next(); 1974 // get the search index by name 1975 CmsSearchIndex index = getIndex(indexName); 1976 if (index != null) { 1977 // remove the index 1978 removeSearchIndex(index); 1979 } else { 1980 if (LOG.isWarnEnabled()) { 1981 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1982 } 1983 } 1984 } 1985 } 1986 1987 /** 1988 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 1989 * 1990 * @param indexsource the indexsource to remove from the configuration 1991 * 1992 * @return true if remove was successful, false if preconditions for removal are ok but the given 1993 * searchindex was unknown to the manager. 1994 * 1995 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 1996 * <code>{@link CmsSearchIndex}</code>. 1997 * 1998 */ 1999 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2000 2001 // validation if removal will be granted 2002 Iterator<CmsSearchIndex> itIndexes = m_indexes.iterator(); 2003 CmsSearchIndex idx; 2004 // the list for collecting indexes that use the given index source 2005 List<CmsSearchIndex> referrers = new ArrayList<CmsSearchIndex>(); 2006 // the current list of referred index sources of the iterated index 2007 List<CmsSearchIndexSource> refsources; 2008 while (itIndexes.hasNext()) { 2009 idx = itIndexes.next(); 2010 refsources = idx.getSources(); 2011 if (refsources != null) { 2012 if (refsources.contains(indexsource)) { 2013 referrers.add(idx); 2014 } 2015 } 2016 } 2017 if (referrers.size() > 0) { 2018 throw new CmsIllegalStateException( 2019 Messages.get().container( 2020 Messages.ERR_INDEX_SOURCE_DELETE_2, 2021 indexsource.getName(), 2022 referrers.toString())); 2023 } 2024 2025 // remove operation (no exception) 2026 return m_indexSources.remove(indexsource.getName()) != null; 2027 2028 } 2029 2030 /** 2031 * Resumes offline indexing if it was paused.<p> 2032 */ 2033 public void resumeOfflineIndexing() { 2034 2035 if (m_offlineUpdateFrequency == Long.MAX_VALUE) { 2036 setOfflineUpdateFrequency( 2037 m_configuredOfflineIndexingFrequency > 0 2038 ? m_configuredOfflineIndexingFrequency 2039 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2040 } 2041 } 2042 2043 /** 2044 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2045 * 2046 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2047 */ 2048 public void setDirectory(String value) { 2049 2050 m_path = value; 2051 } 2052 2053 /** 2054 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2055 * 2056 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2057 */ 2058 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2059 2060 m_extractionCacheMaxAge = extractionCacheMaxAge; 2061 } 2062 2063 /** 2064 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2065 * 2066 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2067 */ 2068 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2069 2070 try { 2071 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2072 } catch (NumberFormatException e) { 2073 LOG.error( 2074 Messages.get().getBundle().key( 2075 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2076 extractionCacheMaxAge, 2077 new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2078 e); 2079 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2080 } 2081 } 2082 2083 /** 2084 * Sets the unlock mode during indexing.<p> 2085 * 2086 * @param value the value 2087 */ 2088 public void setForceunlock(String value) { 2089 2090 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2091 } 2092 2093 /** 2094 * Sets the highlighter.<p> 2095 * 2096 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2097 * 2098 * @param highlighter the package/class name of the highlighter 2099 */ 2100 public void setHighlighter(String highlighter) { 2101 2102 try { 2103 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2104 } catch (Exception e) { 2105 m_highlighter = null; 2106 LOG.error(e.getLocalizedMessage(), e); 2107 } 2108 } 2109 2110 /** 2111 * Sets the seconds to wait for an index lock during an update operation.<p> 2112 * 2113 * @param value the seconds to wait for an index lock during an update operation 2114 */ 2115 public void setIndexLockMaxWaitSeconds(int value) { 2116 2117 m_indexLockMaxWaitSeconds = value; 2118 } 2119 2120 /** 2121 * Sets the max. excerpt length.<p> 2122 * 2123 * @param maxExcerptLength the max. excerpt length to set 2124 */ 2125 public void setMaxExcerptLength(int maxExcerptLength) { 2126 2127 m_maxExcerptLength = maxExcerptLength; 2128 } 2129 2130 /** 2131 * Sets the max. excerpt length as a String.<p> 2132 * 2133 * @param maxExcerptLength the max. excerpt length to set 2134 */ 2135 public void setMaxExcerptLength(String maxExcerptLength) { 2136 2137 try { 2138 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2139 } catch (Exception e) { 2140 LOG.error( 2141 Messages.get().getBundle().key( 2142 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2143 maxExcerptLength, 2144 new Integer(DEFAULT_EXCERPT_LENGTH)), 2145 e); 2146 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2147 } 2148 } 2149 2150 /** 2151 * Sets the maximal wait time for offline index updates after edit operations.<p> 2152 * 2153 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2154 */ 2155 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2156 2157 m_maxIndexWaitTime = maxIndexWaitTime; 2158 } 2159 2160 /** 2161 * Sets the maximal wait time for offline index updates after edit operations.<p> 2162 * 2163 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2164 */ 2165 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2166 2167 try { 2168 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2169 } catch (Exception e) { 2170 LOG.error( 2171 Messages.get().getBundle().key( 2172 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2173 maxIndexWaitTime, 2174 new Long(DEFAULT_MAX_INDEX_WAITTIME)), 2175 e); 2176 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2177 } 2178 } 2179 2180 /** 2181 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2182 * 2183 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2184 */ 2185 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2186 2187 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2188 } 2189 2190 /** 2191 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2192 * 2193 * @param value the maximum number of modifications to set 2194 */ 2195 public void setMaxModificationsBeforeCommit(String value) { 2196 2197 try { 2198 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2199 } catch (Exception e) { 2200 LOG.error( 2201 Messages.get().getBundle().key( 2202 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2203 value, 2204 new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2205 e); 2206 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2207 } 2208 } 2209 2210 /** 2211 * Sets the update frequency of the offline indexer in milliseconds.<p> 2212 * 2213 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2214 */ 2215 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2216 2217 m_offlineUpdateFrequency = offlineUpdateFrequency; 2218 updateOfflineIndexes(0); 2219 } 2220 2221 /** 2222 * Sets the update frequency of the offline indexer in milliseconds.<p> 2223 * 2224 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2225 */ 2226 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2227 2228 try { 2229 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2230 } catch (Exception e) { 2231 LOG.error( 2232 Messages.get().getBundle().key( 2233 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2234 offlineUpdateFrequency, 2235 new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2236 e); 2237 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2238 } 2239 } 2240 2241 /** 2242 * Sets the Solr configuration.<p> 2243 * 2244 * @param config the Solr configuration 2245 */ 2246 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2247 2248 m_solrConfig = config; 2249 } 2250 2251 /** 2252 * Sets the timeout to abandon threads indexing a resource.<p> 2253 * 2254 * @param value the timeout in milliseconds 2255 */ 2256 public void setTimeout(long value) { 2257 2258 m_timeout = value; 2259 } 2260 2261 /** 2262 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2263 * 2264 * @param value the timeout in milliseconds 2265 */ 2266 public void setTimeout(String value) { 2267 2268 try { 2269 setTimeout(Long.parseLong(value)); 2270 } catch (Exception e) { 2271 LOG.error( 2272 Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)), 2273 e); 2274 setTimeout(DEFAULT_TIMEOUT); 2275 } 2276 } 2277 2278 /** 2279 * Shuts down the search manager.<p> 2280 * 2281 * This will cause all search indices to be shut down.<p> 2282 */ 2283 public void shutDown() { 2284 2285 if (m_offlineIndexThread != null) { 2286 m_offlineIndexThread.shutDown(); 2287 } 2288 2289 if (m_offlineHandler != null) { 2290 OpenCms.removeCmsEventListener(m_offlineHandler); 2291 } 2292 2293 Iterator<CmsSearchIndex> i = m_indexes.iterator(); 2294 while (i.hasNext()) { 2295 CmsSearchIndex index = i.next(); 2296 index.shutDown(); 2297 index = null; 2298 } 2299 m_indexes.clear(); 2300 2301 shutDownSolrContainer(); 2302 2303 if (CmsLog.INIT.isInfoEnabled()) { 2304 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2305 } 2306 } 2307 2308 /** 2309 * Updates all offline indexes.<p> 2310 * 2311 * Can be used to force an index update when it's not convenient to wait until the 2312 * offline update interval has eclipsed.<p> 2313 * 2314 * Since the offline indexes still need some time to update the new resources, 2315 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2316 * to ensure that updating is finished. 2317 * 2318 * @see #updateOfflineIndexes(long) 2319 * 2320 */ 2321 public void updateOfflineIndexes() { 2322 2323 updateOfflineIndexes(getMaxIndexWaitTime()); 2324 } 2325 2326 /** 2327 * Updates all offline indexes.<p> 2328 * 2329 * Can be used to force an index update when it's not convenient to wait until the 2330 * offline update interval has eclipsed.<p> 2331 * 2332 * Since the offline index will still need some time to update the new resources even if it runs directly, 2333 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2334 * 2335 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2336 */ 2337 public void updateOfflineIndexes(long waitTime) { 2338 2339 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2340 // notify existing thread of update frequency change 2341 if (LOG.isDebugEnabled()) { 2342 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2343 } 2344 m_offlineIndexThread.interrupt(); 2345 if (waitTime > 0) { 2346 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2347 } 2348 } 2349 } 2350 2351 /** 2352 * Cleans up the extraction result cache.<p> 2353 */ 2354 protected void cleanExtractionCache() { 2355 2356 // clean up the extraction result cache 2357 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2358 } 2359 2360 /** 2361 * Collects the related containerpages to the resources that have been published.<p> 2362 * 2363 * @param adminCms an OpenCms user context with Admin permissions 2364 * @param updateResources the resources to be re-indexed 2365 * 2366 * @return the updated list of resource to re-index 2367 */ 2368 protected List<CmsPublishedResource> findRelatedContainerPages( 2369 CmsObject adminCms, 2370 List<CmsPublishedResource> updateResources) { 2371 2372 Set<CmsResource> elementGroups = new HashSet<CmsResource>(); 2373 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2374 int containerPageTypeId = -1; 2375 try { 2376 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2377 } catch (CmsLoaderException e) { 2378 // will happen during setup, when container page type is not available yet 2379 LOG.info(e.getLocalizedMessage(), e); 2380 } 2381 if (containerPageTypeId != -1) { 2382 for (CmsPublishedResource pubRes : updateResources) { 2383 try { 2384 if (OpenCms.getResourceManager().getResourceType( 2385 pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2386 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2387 filter.filterStrong(); 2388 List<CmsRelation> relations = adminCms.readRelations(filter); 2389 for (CmsRelation relation : relations) { 2390 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2391 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2392 containerPages.add(res); 2393 if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) { 2394 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2395 } 2396 } else if (OpenCms.getResourceManager().getResourceType( 2397 res.getTypeId()).getTypeName().equals( 2398 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) { 2399 elementGroups.add(res); 2400 } 2401 } 2402 } 2403 if (containerPageTypeId == pubRes.getType()) { 2404 addDetailContent( 2405 adminCms, 2406 containerPages, 2407 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2408 } 2409 } catch (CmsException e) { 2410 LOG.error(e.getLocalizedMessage(), e); 2411 } 2412 } 2413 for (CmsResource pubRes : elementGroups) { 2414 try { 2415 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2416 filter.filterStrong(); 2417 List<CmsRelation> relations = adminCms.readRelations(filter); 2418 for (CmsRelation relation : relations) { 2419 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2420 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2421 containerPages.add(res); 2422 if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) { 2423 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2424 } 2425 } 2426 } 2427 } catch (CmsException e) { 2428 LOG.error(e.getLocalizedMessage(), e); 2429 } 2430 } 2431 // add all found container pages as published resource objects to the list 2432 for (CmsResource page : containerPages) { 2433 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2434 if (!updateResources.contains(pubCont)) { 2435 // ensure container page is added only once 2436 updateResources.add(pubCont); 2437 } 2438 } 2439 } 2440 return updateResources; 2441 } 2442 2443 /** 2444 * Returns the set of names of all configured document types.<p> 2445 * 2446 * @return the set of names of all configured document types 2447 */ 2448 protected List<String> getDocumentTypes() { 2449 2450 List<String> names = new ArrayList<String>(); 2451 for (Iterator<I_CmsDocumentFactory> i = m_documentTypes.values().iterator(); i.hasNext();) { 2452 I_CmsDocumentFactory factory = i.next(); 2453 names.add(factory.getName()); 2454 } 2455 return names; 2456 } 2457 2458 /** 2459 * Returns the a offline project used for offline indexing.<p> 2460 * 2461 * @return the offline project if available 2462 */ 2463 protected CmsProject getOfflineIndexProject() { 2464 2465 CmsProject result = null; 2466 for (CmsSearchIndex index : m_offlineIndexes) { 2467 try { 2468 result = m_adminCms.readProject(index.getProject()); 2469 2470 if (!result.isOnlineProject()) { 2471 break; 2472 } 2473 } catch (Exception e) { 2474 // may be a missconfigured index, ignore 2475 LOG.error(e.getLocalizedMessage(), e); 2476 } 2477 } 2478 return result; 2479 } 2480 2481 /** 2482 * Returns a new thread manager for the indexing threads.<p> 2483 * 2484 * @return a new thread manager for the indexing threads 2485 */ 2486 protected CmsIndexingThreadManager getThreadManager() { 2487 2488 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 2489 } 2490 2491 /** 2492 * Initializes the available Cms resource types to be indexed.<p> 2493 * 2494 * A map stores document factories keyed by a string representing 2495 * a colon separated list of Cms resource types and/or mimetypes.<p> 2496 * 2497 * The keys of this map are used to trigger a document factory to convert 2498 * a Cms resource into a Lucene index document.<p> 2499 * 2500 * A document factory is a class implementing the interface 2501 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 2502 */ 2503 protected void initAvailableDocumentTypes() { 2504 2505 CmsSearchDocumentType documenttype = null; 2506 String className = null; 2507 String name = null; 2508 I_CmsDocumentFactory documentFactory = null; 2509 List<String> resourceTypes = null; 2510 List<String> mimeTypes = null; 2511 Class<?> c = null; 2512 2513 m_documentTypes = new HashMap<String, I_CmsDocumentFactory>(); 2514 2515 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 2516 2517 documenttype = m_documentTypeConfigs.get(i); 2518 name = documenttype.getName(); 2519 2520 try { 2521 className = documenttype.getClassName(); 2522 resourceTypes = documenttype.getResourceTypes(); 2523 mimeTypes = documenttype.getMimeTypes(); 2524 2525 if (name == null) { 2526 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 2527 } 2528 if (className == null) { 2529 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 2530 } 2531 if (resourceTypes.size() == 0) { 2532 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 2533 } 2534 2535 try { 2536 c = Class.forName(className); 2537 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 2538 new Object[] {name}); 2539 } catch (ClassNotFoundException exc) { 2540 throw new CmsIndexException( 2541 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 2542 exc); 2543 } catch (Exception exc) { 2544 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 2545 } 2546 2547 if (documentFactory.isUsingCache()) { 2548 // init cache if used by the factory 2549 documentFactory.setCache(m_extractionResultCache); 2550 } 2551 2552 for (Iterator<String> key = documentFactory.getDocumentKeys( 2553 resourceTypes, 2554 mimeTypes).iterator(); key.hasNext();) { 2555 m_documentTypes.put(key.next(), documentFactory); 2556 } 2557 2558 } catch (CmsException e) { 2559 if (LOG.isWarnEnabled()) { 2560 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 2561 } 2562 } 2563 } 2564 } 2565 2566 /** 2567 * Initializes the configured search indexes.<p> 2568 * 2569 * This initializes also the list of Cms resources types 2570 * to be indexed by an index source.<p> 2571 */ 2572 protected void initSearchIndexes() { 2573 2574 CmsSearchIndex index = null; 2575 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2576 index = m_indexes.get(i); 2577 // reset disabled flag 2578 index.setEnabled(true); 2579 // check if the index has been configured correctly 2580 if (index.checkConfiguration(m_adminCms)) { 2581 // the index is configured correctly 2582 try { 2583 index.initialize(); 2584 } catch (Exception e) { 2585 if (CmsLog.INIT.isWarnEnabled()) { 2586 // in this case the index will be disabled 2587 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 2588 } 2589 } 2590 } 2591 // output a log message if the index was successfully configured or not 2592 if (CmsLog.INIT.isInfoEnabled()) { 2593 if (index.isEnabled()) { 2594 CmsLog.INIT.info( 2595 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 2596 } else { 2597 CmsLog.INIT.warn( 2598 Messages.get().getBundle().key( 2599 Messages.INIT_INDEX_NOT_CONFIGURED_2, 2600 index, 2601 index.getProject())); 2602 } 2603 } 2604 } 2605 } 2606 2607 /** 2608 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 2609 * after resources have been published.<p> 2610 * 2611 * @param adminCms an OpenCms user context with Admin permissions 2612 * @param publishHistoryId the history ID of the published project 2613 * @param report the report to write the output to 2614 */ 2615 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 2616 2617 int oldPriority = Thread.currentThread().getPriority(); 2618 try { 2619 SEARCH_MANAGER_LOCK.lock(); 2620 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 2621 List<CmsPublishedResource> publishedResources; 2622 try { 2623 // read the list of all published resources 2624 publishedResources = adminCms.readPublishedResources(publishHistoryId); 2625 } catch (CmsException e) { 2626 LOG.error( 2627 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 2628 e); 2629 return; 2630 } 2631 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 2632 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 2633 2634 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 2635 for (CmsPublishedResource res : publishedResources) { 2636 if (res.isFolder() || res.getState().isUnchanged()) { 2637 // folders and unchanged resources don't need to be indexed after publish 2638 continue; 2639 } 2640 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 2641 if (updateResources.contains(res)) { 2642 // resource may have been added as a sibling of another resource 2643 // in this case we make sure to use the value from the publish list because of the "deleted" flag 2644 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 2645 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 2646 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 2647 // check it this is a moved resource with source / target info, in this case we need both entries 2648 if (!hasMoved) { 2649 // if the resource was moved, we must contain both entries 2650 updateResources.remove(res); 2651 } 2652 // "equals()" implementation of published resource checks for id, 2653 // so the removed value may have a different "deleted" or "modified" status value 2654 updateResources.add(res); 2655 } else { 2656 // resource not yet contained in the list 2657 updateResources.add(res); 2658 // check for the siblings (not for deleted resources, these are already gone) 2659 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 2660 // this resource has siblings 2661 try { 2662 // read siblings from the online project 2663 List<CmsResource> siblings = adminCms.readSiblings( 2664 res.getRootPath(), 2665 CmsResourceFilter.ALL); 2666 Iterator<CmsResource> itSib = siblings.iterator(); 2667 while (itSib.hasNext()) { 2668 // check all siblings 2669 CmsResource sibling = itSib.next(); 2670 CmsPublishedResource sib = new CmsPublishedResource(sibling); 2671 if (!updateResources.contains(sib)) { 2672 // ensure sibling is added only once 2673 updateResources.add(sib); 2674 } 2675 } 2676 } catch (CmsException e) { 2677 // ignore, just use the original resource 2678 if (LOG.isWarnEnabled()) { 2679 LOG.warn( 2680 Messages.get().getBundle().key( 2681 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 2682 res.getRootPath()), 2683 e); 2684 } 2685 } 2686 } 2687 } 2688 } 2689 } 2690 2691 findRelatedContainerPages(adminCms, updateResources); 2692 if (!updateResources.isEmpty()) { 2693 // sort the resource to update 2694 Collections.sort(updateResources); 2695 // only update the indexes if the list of remaining published resources is not empty 2696 Iterator<CmsSearchIndex> i = m_indexes.iterator(); 2697 while (i.hasNext()) { 2698 CmsSearchIndex index = i.next(); 2699 if (CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 2700 // only update indexes which have the rebuild mode set to "auto" 2701 try { 2702 updateIndex(index, report, updateResources); 2703 } catch (CmsException e) { 2704 LOG.error( 2705 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 2706 e); 2707 } 2708 } 2709 } 2710 } 2711 // clean up the extraction result cache 2712 cleanExtractionCache(); 2713 } finally { 2714 SEARCH_MANAGER_LOCK.unlock(); 2715 Thread.currentThread().setPriority(oldPriority); 2716 } 2717 } 2718 2719 /** 2720 * Updates (if required creates) the index with the given name.<p> 2721 * 2722 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 2723 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 2724 * the index will be fully rebuild.<p> 2725 * 2726 * @param index the index to update or rebuild 2727 * @param report the report to write output messages to 2728 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 2729 * 2730 * @throws CmsException if something goes wrong 2731 */ 2732 protected void updateIndex(CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 2733 throws CmsException { 2734 2735 try { 2736 SEARCH_MANAGER_LOCK.lock(); 2737 2738 // copy the stored admin context for the indexing 2739 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 2740 // make sure a report is available 2741 if (report == null) { 2742 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 2743 } 2744 2745 // check if the index has been configured correctly 2746 if (!index.checkConfiguration(cms)) { 2747 // the index is disabled 2748 return; 2749 } 2750 2751 // set site root and project for this index 2752 cms.getRequestContext().setSiteRoot("/"); 2753 // switch to the index project 2754 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 2755 2756 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 2757 // rebuild the complete index 2758 2759 // create a new thread manager for the indexing threads 2760 CmsIndexingThreadManager threadManager = getThreadManager(); 2761 2762 boolean isOfflineIndex = false; 2763 if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 2764 // disable offline indexing while the complete index is rebuild 2765 isOfflineIndex = true; 2766 index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_MANUAL); 2767 // re-initialize the offline indexes, this will disable this offline index 2768 initOfflineIndexes(); 2769 } 2770 2771 I_CmsIndexWriter writer = null; 2772 try { 2773 // create a backup of the existing index 2774 String backup = index.createIndexBackup(); 2775 if (backup != null) { 2776 index.indexSearcherOpen(backup); 2777 } 2778 2779 // create a new index writer 2780 writer = index.getIndexWriter(report, true); 2781 if (writer instanceof CmsSolrIndexWriter) { 2782 try { 2783 ((CmsSolrIndexWriter)writer).deleteAllDocuments(); 2784 } catch (IOException e) { 2785 LOG.error(e.getMessage(), e); 2786 } 2787 } 2788 2789 // output start information on the report 2790 report.println( 2791 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 2792 I_CmsReport.FORMAT_HEADLINE); 2793 2794 // iterate all configured index sources of this index 2795 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 2796 while (sources.hasNext()) { 2797 // get the next index source 2798 CmsSearchIndexSource source = sources.next(); 2799 // create the indexer 2800 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 2801 // new index creation, use all resources from the index source 2802 indexer.rebuildIndex(writer, threadManager, source); 2803 2804 // wait for indexing threads to finish 2805 while (threadManager.isRunning()) { 2806 try { 2807 Thread.sleep(500); 2808 } catch (InterruptedException e) { 2809 // just continue with the loop after interruption 2810 LOG.info(e.getLocalizedMessage(), e); 2811 } 2812 } 2813 2814 // commit and optimize the index after each index source has been finished 2815 try { 2816 writer.commit(); 2817 } catch (IOException e) { 2818 if (LOG.isWarnEnabled()) { 2819 LOG.warn( 2820 Messages.get().getBundle().key( 2821 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 2822 index.getName(), 2823 index.getPath()), 2824 e); 2825 } 2826 } 2827 try { 2828 writer.optimize(); 2829 } catch (IOException e) { 2830 if (LOG.isWarnEnabled()) { 2831 LOG.warn( 2832 Messages.get().getBundle().key( 2833 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 2834 index.getName(), 2835 index.getPath()), 2836 e); 2837 } 2838 } 2839 } 2840 2841 if (backup != null) { 2842 // remove the backup after the files have been re-indexed 2843 index.indexSearcherClose(); 2844 index.removeIndexBackup(backup); 2845 } 2846 2847 // output finish information on the report 2848 report.println( 2849 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 2850 I_CmsReport.FORMAT_HEADLINE); 2851 2852 } finally { 2853 if (writer != null) { 2854 try { 2855 writer.close(); 2856 } catch (IOException e) { 2857 if (LOG.isWarnEnabled()) { 2858 LOG.warn( 2859 Messages.get().getBundle().key( 2860 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 2861 index.getPath(), 2862 index.getName()), 2863 e); 2864 } 2865 } 2866 } 2867 if (isOfflineIndex) { 2868 // reset the mode of the offline index 2869 index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_OFFLINE); 2870 // re-initialize the offline indexes, this will re-enable this index 2871 initOfflineIndexes(); 2872 } 2873 // index has changed - initialize the index searcher instance 2874 index.indexSearcherOpen(index.getPath()); 2875 } 2876 2877 // show information about indexing runtime 2878 threadManager.reportStatistics(report); 2879 2880 } else { 2881 updateIndexIncremental(cms, index, report, resourcesToIndex); 2882 } 2883 } finally { 2884 SEARCH_MANAGER_LOCK.unlock(); 2885 } 2886 } 2887 2888 /** 2889 * Incrementally updates the given index.<p> 2890 * 2891 * @param cms the OpenCms user context to use for accessing the VFS 2892 * @param index the index to update 2893 * @param report the report to write output messages to 2894 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 2895 * 2896 * @throws CmsException if something goes wrong 2897 */ 2898 protected void updateIndexIncremental( 2899 CmsObject cms, 2900 CmsSearchIndex index, 2901 I_CmsReport report, 2902 List<CmsPublishedResource> resourcesToIndex) 2903 throws CmsException { 2904 2905 try { 2906 SEARCH_MANAGER_LOCK.lock(); 2907 2908 // update the existing index 2909 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 2910 2911 boolean hasResourcesToDelete = false; 2912 boolean hasResourcesToUpdate = false; 2913 2914 // iterate all configured index sources of this index 2915 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 2916 while (sources.hasNext()) { 2917 // get the next index source 2918 CmsSearchIndexSource source = sources.next(); 2919 // create the indexer 2920 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 2921 // collect the resources to update 2922 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 2923 if (!updateData.isEmpty()) { 2924 // add the update collection to the internal pipeline 2925 updateCollections.add(updateData); 2926 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 2927 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 2928 } 2929 } 2930 2931 // only start index modification if required 2932 if (hasResourcesToDelete || hasResourcesToUpdate) { 2933 // output start information on the report 2934 report.println( 2935 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 2936 I_CmsReport.FORMAT_HEADLINE); 2937 2938 I_CmsIndexWriter writer = null; 2939 try { 2940 // obtain an index writer that updates the current index 2941 writer = index.getIndexWriter(report, false); 2942 2943 if (hasResourcesToDelete) { 2944 // delete the resource from the index 2945 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 2946 while (i.hasNext()) { 2947 CmsSearchIndexUpdateData updateCollection = i.next(); 2948 if (updateCollection.hasResourcesToDelete()) { 2949 updateCollection.getIndexer().deleteResources( 2950 writer, 2951 updateCollection.getResourcesToDelete()); 2952 } 2953 } 2954 } 2955 2956 if (hasResourcesToUpdate) { 2957 // create a new thread manager 2958 CmsIndexingThreadManager threadManager = getThreadManager(); 2959 2960 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 2961 while (i.hasNext()) { 2962 CmsSearchIndexUpdateData updateCollection = i.next(); 2963 if (updateCollection.hasResourceToUpdate()) { 2964 updateCollection.getIndexer().updateResources( 2965 writer, 2966 threadManager, 2967 updateCollection.getResourcesToUpdate()); 2968 } 2969 } 2970 2971 // wait for indexing threads to finish 2972 while (threadManager.isRunning()) { 2973 try { 2974 Thread.sleep(500); 2975 } catch (InterruptedException e) { 2976 // just continue with the loop after interruption 2977 LOG.info(e.getLocalizedMessage(), e); 2978 } 2979 } 2980 } 2981 } finally { 2982 // close the index writer 2983 if (writer != null) { 2984 try { 2985 writer.commit(); 2986 } catch (IOException e) { 2987 LOG.error( 2988 Messages.get().getBundle().key( 2989 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 2990 index.getName(), 2991 index.getPath()), 2992 e); 2993 } 2994 } 2995 // index has changed - initialize the index searcher instance 2996 index.indexSearcherUpdate(); 2997 } 2998 2999 // output finish information on the report 3000 report.println( 3001 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3002 I_CmsReport.FORMAT_HEADLINE); 3003 } 3004 } finally { 3005 SEARCH_MANAGER_LOCK.unlock(); 3006 } 3007 } 3008 3009 /** 3010 * Updates the offline search indexes for the given list of resources.<p> 3011 * 3012 * @param report the report to write the index information to 3013 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3014 */ 3015 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3016 3017 CmsObject cms = m_adminCms; 3018 try { 3019 // copy the administration context for the indexing 3020 cms = OpenCms.initCmsObject(m_adminCms); 3021 // set site root and project for this index 3022 cms.getRequestContext().setSiteRoot("/"); 3023 } catch (CmsException e) { 3024 LOG.error(e.getLocalizedMessage(), e); 3025 } 3026 3027 Iterator<CmsSearchIndex> j = m_offlineIndexes.iterator(); 3028 while (j.hasNext()) { 3029 CmsSearchIndex index = j.next(); 3030 if (index.getSources() != null) { 3031 try { 3032 // switch to the index project 3033 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3034 updateIndexIncremental(cms, index, report, resourcesToIndex); 3035 } catch (CmsException e) { 3036 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3037 } 3038 } 3039 } 3040 } 3041 3042 /** 3043 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3044 * 3045 * @param adminCms the cms context 3046 * @param containerPages the containerpages 3047 * @param containerPage the container page site path 3048 */ 3049 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3050 3051 if (CmsJspTagContainer.isDetailContainersPage(adminCms, containerPage)) { 3052 3053 try { 3054 CmsResource detailRes = adminCms.readResource( 3055 CmsJspTagContainer.getDetailContentPath(containerPage), 3056 CmsResourceFilter.IGNORE_EXPIRATION); 3057 containerPages.add(detailRes); 3058 } catch (Throwable e) { 3059 if (LOG.isWarnEnabled()) { 3060 LOG.warn(e.getLocalizedMessage(), e); 3061 } 3062 } 3063 } 3064 } 3065 3066 /** 3067 * Creates the Solr core container.<p> 3068 * 3069 * @return the created core container 3070 */ 3071 private CoreContainer createCoreContainer() { 3072 3073 CoreContainer container = null; 3074 try { 3075 // get the core container 3076 // still no core container: create it 3077 container = CoreContainer.createAndLoad( 3078 Paths.get(m_solrConfig.getHome()), 3079 m_solrConfig.getSolrFile().toPath()); 3080 if (CmsLog.INIT.isInfoEnabled()) { 3081 CmsLog.INIT.info( 3082 Messages.get().getBundle().key( 3083 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3084 m_solrConfig.getHome(), 3085 m_solrConfig.getSolrFile().getName())); 3086 } 3087 } catch (Exception e) { 3088 LOG.error( 3089 Messages.get().getBundle().key( 3090 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3091 m_solrConfig.getSolrFile().getAbsolutePath()), 3092 e); 3093 } 3094 return container; 3095 3096 } 3097 3098 /** 3099 * Returns the report in the given event data, if <code>null</code> 3100 * a new log report is used.<p> 3101 * 3102 * @param event the event to get the report for 3103 * 3104 * @return the report 3105 */ 3106 private I_CmsReport getEventReport(CmsEvent event) { 3107 3108 I_CmsReport report = null; 3109 if (event.getData() != null) { 3110 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3111 } 3112 if (report == null) { 3113 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3114 } 3115 return report; 3116 } 3117 3118 /** 3119 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3120 * 3121 * @param publishedResources a list of published resources 3122 * 3123 * @return the set of structure ids that satisfy the condition above 3124 */ 3125 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3126 List<CmsPublishedResource> publishedResources) { 3127 3128 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3129 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3130 for (CmsPublishedResource pubRes : publishedResources) { 3131 if (pubRes.getState().isNew()) { 3132 result.add(pubRes.getStructureId()); 3133 } 3134 if (pubRes.getState().isDeleted()) { 3135 deletedSet.add(pubRes.getStructureId()); 3136 } 3137 } 3138 result.retainAll(deletedSet); 3139 return result; 3140 } 3141 3142 /** 3143 * Shuts down the Solr core container.<p> 3144 */ 3145 private void shutDownSolrContainer() { 3146 3147 if (m_coreContainer != null) { 3148 for (SolrCore core : m_coreContainer.getCores()) { 3149 // do not unload spellcheck core because otherwise the core.properties file is removed 3150 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3151 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3152 m_coreContainer.unload(core.getName(), false, false, true); 3153 } 3154 } 3155 m_coreContainer.shutdown(); 3156 if (CmsLog.INIT.isInfoEnabled()) { 3157 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3158 } 3159 m_coreContainer = null; 3160 } 3161 } 3162 3163}