001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
031import org.opencms.configuration.CmsConfigurationException;
032import org.opencms.db.CmsDriverManager;
033import org.opencms.db.CmsPublishedResource;
034import org.opencms.db.CmsResourceState;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsProject;
037import org.opencms.file.CmsResource;
038import org.opencms.file.CmsResourceFilter;
039import org.opencms.file.CmsUser;
040import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
041import org.opencms.file.types.CmsResourceTypeXmlContent;
042import org.opencms.file.types.I_CmsResourceType;
043import org.opencms.i18n.CmsLocaleManager;
044import org.opencms.i18n.CmsMessageContainer;
045import org.opencms.loader.CmsLoaderException;
046import org.opencms.main.CmsBroadcast.ContentMode;
047import org.opencms.main.CmsEvent;
048import org.opencms.main.CmsException;
049import org.opencms.main.CmsIllegalArgumentException;
050import org.opencms.main.CmsIllegalStateException;
051import org.opencms.main.CmsLog;
052import org.opencms.main.I_CmsEventListener;
053import org.opencms.main.OpenCms;
054import org.opencms.main.OpenCmsSolrHandler;
055import org.opencms.relations.CmsRelation;
056import org.opencms.relations.CmsRelationFilter;
057import org.opencms.relations.CmsRelationType;
058import org.opencms.report.CmsLogReport;
059import org.opencms.report.CmsShellLogReport;
060import org.opencms.report.I_CmsReport;
061import org.opencms.scheduler.I_CmsScheduledJob;
062import org.opencms.search.documents.A_CmsVfsDocument;
063import org.opencms.search.documents.CmsExtractionResultCache;
064import org.opencms.search.documents.I_CmsDocumentFactory;
065import org.opencms.search.documents.I_CmsTermHighlighter;
066import org.opencms.search.fields.CmsLuceneField;
067import org.opencms.search.fields.CmsLuceneFieldConfiguration;
068import org.opencms.search.fields.CmsSearchField;
069import org.opencms.search.fields.CmsSearchFieldConfiguration;
070import org.opencms.search.fields.CmsSearchFieldMapping;
071import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
072import org.opencms.search.solr.CmsSolrConfiguration;
073import org.opencms.search.solr.CmsSolrFieldConfiguration;
074import org.opencms.search.solr.CmsSolrIndex;
075import org.opencms.search.solr.I_CmsSolrIndexWriter;
076import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
077import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer;
078import org.opencms.security.CmsRole;
079import org.opencms.security.CmsRoleViolationException;
080import org.opencms.util.A_CmsModeStringEnumeration;
081import org.opencms.util.CmsFileUtil;
082import org.opencms.util.CmsStringUtil;
083import org.opencms.util.CmsUUID;
084import org.opencms.util.CmsWaitHandle;
085
086import java.io.File;
087import java.io.IOException;
088import java.nio.file.FileSystems;
089import java.nio.file.Paths;
090import java.util.ArrayList;
091import java.util.Collection;
092import java.util.Collections;
093import java.util.HashMap;
094import java.util.HashSet;
095import java.util.Iterator;
096import java.util.LinkedHashMap;
097import java.util.List;
098import java.util.ListIterator;
099import java.util.Locale;
100import java.util.Map;
101import java.util.Set;
102import java.util.TreeMap;
103import java.util.concurrent.locks.ReentrantLock;
104import java.util.stream.Collectors;
105
106import org.apache.commons.logging.Log;
107import org.apache.lucene.analysis.Analyzer;
108import org.apache.lucene.analysis.CharArraySet;
109import org.apache.lucene.analysis.standard.StandardAnalyzer;
110import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
111import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
112import org.apache.solr.core.CoreContainer;
113import org.apache.solr.core.CoreDescriptor;
114import org.apache.solr.core.SolrCore;
115
116/**
117 * Implements the general management and configuration of the search and
118 * indexing facilities in OpenCms.<p>
119 *
120 * @since 6.0.0
121 */
122public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
123
124    /**
125     *  Enumeration class for force unlock types.<p>
126     */
127    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
128
129        /** Force unlock type "always". */
130        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
131
132        /** Force unlock type "never". */
133        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
134
135        /** Force unlock type "only full". */
136        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
137
138        /** Serializable version id. */
139        private static final long serialVersionUID = 74746076708908673L;
140
141        /**
142         * Creates a new force unlock type with the given name.<p>
143         *
144         * @param mode the mode id to use
145         */
146        protected CmsSearchForceUnlockMode(String mode) {
147
148            super(mode);
149        }
150
151        /**
152         * Returns the lock type for the given type value.<p>
153         *
154         * @param type the type value to get the lock type for
155         *
156         * @return the lock type for the given type value
157         */
158        public static CmsSearchForceUnlockMode valueOf(String type) {
159
160            if (type.equals(ALWAYS.toString())) {
161                return ALWAYS;
162            } else if (type.equals(NEVER.toString())) {
163                return NEVER;
164            } else {
165                return ONLYFULL;
166            }
167        }
168    }
169
170    /**
171     * Handles offline index generation.<p>
172     */
173    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
174
175        /** Indicates if the event handlers for the offline search have been already registered. */
176        private boolean m_isEventRegistered;
177
178        /** The list of resources to index. */
179        private List<CmsPublishedResource> m_resourcesToIndex;
180
181        /**
182         * Initializes the offline index handler.<p>
183         */
184        protected CmsSearchOfflineHandler() {
185
186            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
187        }
188
189        /**
190         * Implements the event listener of this class.<p>
191         *
192         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
193         */
194        @SuppressWarnings("unchecked")
195        public void cmsEvent(CmsEvent event) {
196
197            switch (event.getType()) {
198                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
199                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
200                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
201                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
202                    Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
203                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
204                        // skip lock & unlock
205                        return;
206                    }
207                    // skip indexing if flag is set in event
208                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
209                    if (skip != null) {
210                        return;
211                    }
212
213                    // a resource has been modified - offline indexes require (re)indexing
214                    List<CmsResource> resources = Collections.singletonList(
215                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
216                    reIndexResources(resources);
217                    break;
218                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
219                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
220                        I_CmsEventListener.KEY_RESOURCES);
221                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
222                    for (CmsResource res : resourcesToDelete) {
223                        if (res.getState().isNew()) {
224                            // if the resource is new and a delete action was performed
225                            // --> set the state of the resource to deleted
226                            res.setState(CmsResourceState.STATE_DELETED);
227                        }
228                    }
229                    reIndexResources(resourcesToDelete);
230                    break;
231                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
232                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
233                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
234                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
235                    // a list of resources has been modified - offline indexes require (re)indexing
236                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
237                    break;
238                default:
239                    // no operation
240            }
241        }
242
243        /**
244         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
245         *
246         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
247         */
248        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
249
250            m_resourcesToIndex.addAll(resourcesToIndex);
251        }
252
253        /**
254         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
255         *
256         * @return the resources to index
257         */
258        protected List<CmsPublishedResource> getResourcesToIndex() {
259
260            List<CmsPublishedResource> result;
261            synchronized (this) {
262                result = m_resourcesToIndex;
263                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
264            }
265            try {
266                CmsObject cms = m_adminCms;
267                CmsProject offline = getOfflineIndexProject();
268                if (offline != null) {
269                    // switch to the offline project if available
270                    cms = OpenCms.initCmsObject(m_adminCms);
271                    cms.getRequestContext().setCurrentProject(offline);
272                }
273                addAdditionallyAffectedResources(cms, result);
274            } catch (CmsException e) {
275                LOG.error(e.getLocalizedMessage(), e);
276            }
277            return result;
278        }
279
280        /**
281         * Initializes this offline search handler, registering the event handlers if required.<p>
282         */
283        protected void initialize() {
284
285            if (m_offlineIndexes.size() > 0) {
286                // there is at least one offline index configured
287                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
288                    // create the offline indexing thread
289                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
290                    // start the offline index thread
291                    m_offlineIndexThread.start();
292                }
293            } else {
294                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
295                    // no offline indexes but thread still running, stop the thread
296                    m_offlineIndexThread.shutDown();
297                    m_offlineIndexThread = null;
298                }
299            }
300            // do this only in case there are offline indexes configured
301            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
302                m_isEventRegistered = true;
303                // register this object as event listener
304                OpenCms.addCmsEventListener(
305                    this,
306                    new int[] {
307                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
308                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
309                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
310                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
311                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
312                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
313                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
314                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
315                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
316            }
317        }
318
319        /**
320         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
321         *
322         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
323         */
324        protected synchronized void reIndexResources(List<CmsResource> resources) {
325
326            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
327            for (CmsResource res : resources) {
328                CmsPublishedResource pubRes = new CmsPublishedResource(res);
329                resourcesToIndex.add(pubRes);
330            }
331            if (resourcesToIndex.size() > 0) {
332                // add the resources found to the offline index thread
333                addResourcesToIndex(resourcesToIndex);
334            }
335        }
336    }
337
338    /**
339     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
340     */
341    protected class CmsSearchOfflineIndexThread extends Thread {
342
343        /** The event handler that triggers this thread. */
344        CmsSearchOfflineHandler m_handler;
345
346        /** Indicates if this thread is still alive. */
347        boolean m_isAlive;
348
349        /** Indicates that an index update thread is currently running. */
350        private boolean m_isUpdating;
351
352        /** If true a manual update (after file upload) was triggered. */
353        private boolean m_updateTriggered;
354
355        /** The wait handle used for signalling when the worker thread has finished. */
356        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
357
358        /**
359         * Constructor.<p>
360         *
361         * @param handler the offline index event handler
362         */
363        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
364
365            super("OpenCms: Offline Search Indexer");
366            m_handler = handler;
367        }
368
369        /**
370         * Gets the wait handle used for signalling when the worker thread has finished.
371         *
372         * @return the wait handle
373         **/
374        public CmsWaitHandle getWaitHandle() {
375
376            return m_waitHandle;
377        }
378
379        /**
380         * @see java.lang.Thread#interrupt()
381         */
382        @Override
383        public void interrupt() {
384
385            super.interrupt();
386            m_updateTriggered = true;
387        }
388
389        /**
390         * @see java.lang.Thread#run()
391         */
392        @Override
393        public void run() {
394
395            // create a log report for the output
396            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
397            long offlineUpdateFrequency = getOfflineUpdateFrequency();
398            m_updateTriggered = false;
399            try {
400                while (m_isAlive) {
401                    if (!m_updateTriggered) {
402                        try {
403                            sleep(offlineUpdateFrequency);
404                        } catch (InterruptedException e) {
405                            // continue the thread after interruption
406                            if (!m_isAlive) {
407                                // the thread has been shut down while sleeping
408                                continue;
409                            }
410                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
411                                // offline update frequency change - clear interrupt status
412                                offlineUpdateFrequency = getOfflineUpdateFrequency();
413                            }
414                            LOG.info(e.getLocalizedMessage(), e);
415                        }
416                    }
417                    if (m_isAlive) {
418                        // set update trigger to false since we do the update now
419                        m_updateTriggered = false;
420                        // get list of resource to update
421                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
422                        if (resourcesToIndex.size() > 0) {
423                            // only start indexing if there is at least one resource
424                            startOfflineUpdateThread(report, resourcesToIndex);
425                        } else {
426                            getWaitHandle().release();
427                        }
428                        // this is just called to clear the interrupt status of the thread
429                        interrupted();
430                    }
431                }
432            } finally {
433                // make sure that live status is reset in case of Exceptions
434                m_isAlive = false;
435            }
436
437        }
438
439        /**
440         * @see java.lang.Thread#start()
441         */
442        @Override
443        public synchronized void start() {
444
445            m_isAlive = true;
446            super.start();
447        }
448
449        /**
450         * Obtains the list of resource to update in the offline index,
451         * then optimizes the list by removing duplicate entries.<p>
452         *
453         * @return the list of resource to update in the offline index
454         */
455        protected List<CmsPublishedResource> getResourcesToIndex() {
456
457            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
458            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
459
460            // Reverse to always keep the last list entries
461            Collections.reverse(resourcesToIndex);
462            for (CmsPublishedResource pubRes : resourcesToIndex) {
463                boolean addResource = true;
464                for (CmsPublishedResource resRes : result) {
465                    if (pubRes.equals(resRes)
466                        && (pubRes.getState() == resRes.getState())
467                        && (pubRes.getMovedState() == resRes.getMovedState())
468                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
469                        // resource already in the update list
470                        addResource = false;
471                        break;
472                    }
473                }
474                if (addResource) {
475                    result.add(pubRes);
476                }
477
478            }
479            Collections.reverse(result);
480            return changeStateOfMoveOriginsToDeleted(result);
481        }
482
483        /**
484         * Shuts down this offline index thread.<p>
485         */
486        protected void shutDown() {
487
488            m_isAlive = false;
489            interrupt();
490            if (m_isUpdating) {
491                long waitTime = getOfflineUpdateFrequency() / 2;
492                int waitSteps = 0;
493                do {
494                    try {
495                        // wait half the time of the offline index frequency for the thread to finish
496                        Thread.sleep(waitTime);
497                    } catch (InterruptedException e) {
498                        // continue
499                        LOG.info(e.getLocalizedMessage(), e);
500                    }
501                    waitSteps++;
502                    // wait 5 times then stop waiting
503                } while ((waitSteps < 5) && m_isUpdating);
504            }
505        }
506
507        /**
508         * Updates the offline search indexes for the given list of resources.<p>
509         *
510         * @param report the report to write the index information to
511         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
512         */
513        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
514
515            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
516            long startTime = System.currentTimeMillis();
517            long waitTime = getOfflineUpdateFrequency() / 2;
518            if (LOG.isDebugEnabled()) {
519                LOG.debug(
520                    Messages.get().getBundle().key(
521                        Messages.LOG_OI_UPDATE_START_1,
522                        Integer.valueOf(resourcesToIndex.size())));
523            }
524
525            m_isUpdating = true;
526            thread.start();
527
528            do {
529                try {
530                    // wait half the time of the offline index frequency for the thread to finish
531                    thread.join(waitTime);
532                } catch (InterruptedException e) {
533                    // continue
534                    LOG.info(e.getLocalizedMessage(), e);
535                }
536                if (thread.isAlive()) {
537                    LOG.warn(
538                        Messages.get().getBundle().key(
539                            Messages.LOG_OI_UPDATE_LONG_2,
540                            Integer.valueOf(resourcesToIndex.size()),
541                            Long.valueOf(System.currentTimeMillis() - startTime)));
542                }
543            } while (thread.isAlive());
544            m_isUpdating = false;
545
546            if (LOG.isDebugEnabled()) {
547                LOG.debug(
548                    Messages.get().getBundle().key(
549                        Messages.LOG_OI_UPDATE_FINISH_2,
550                        Integer.valueOf(resourcesToIndex.size()),
551                        Long.valueOf(System.currentTimeMillis() - startTime)));
552            }
553        }
554
555        /**
556         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
557         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
558         *
559         * @param resourcesToIndex the resources to index
560         *
561         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
562         */
563        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
564            List<CmsPublishedResource> resourcesToIndex) {
565
566            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
567            for (CmsPublishedResource resource : resourcesToIndex) {
568                if (resource.getState().isDeleted()) {
569                    // we don't want the last path to be from a deleted resource
570                    continue;
571                }
572                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
573            }
574            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
575            for (CmsPublishedResource resource : resourcesToIndex) {
576                if (resource.getState().isDeleted()) {
577                    result.add(resource);
578                    continue;
579                }
580                String lastValidPath = lastValidPaths.get(resource.getStructureId());
581                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
582                    result.add(resource);
583                } else {
584                    result.add(
585                        new CmsPublishedResource(
586                            resource.getStructureId(),
587                            resource.getResourceId(),
588                            resource.getPublishTag(),
589                            resource.getRootPath(),
590                            resource.getType(),
591                            resource.isFolder(),
592                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
593                            resource.getSiblingCount()));
594                }
595            }
596            return result;
597        }
598    }
599
600    /**
601     * An offline index worker Thread runs each time for every offline index update action.<p>
602     *
603     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
604     * problems if a single operation "hangs" the Tread.<p>
605     */
606    protected class CmsSearchOfflineIndexWorkThread extends Thread {
607
608        /** The report to write the index information to. */
609        I_CmsReport m_report;
610
611        /** The list of {@link CmsPublishedResource} objects to index. */
612        List<CmsPublishedResource> m_resourcesToIndex;
613
614        /**
615         * Updates the offline search indexes for the given list of resources.<p>
616         *
617         * @param report the report to write the index information to
618         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
619         */
620        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
621
622            super("OpenCms: Offline Search Index Worker");
623            m_report = report;
624            m_resourcesToIndex = resourcesToIndex;
625        }
626
627        /**
628         * @see java.lang.Thread#run()
629         */
630        @Override
631        public void run() {
632
633            updateIndexOffline(m_report, m_resourcesToIndex);
634            if (m_offlineIndexThread != null) {
635                m_offlineIndexThread.getWaitHandle().release();
636            }
637        }
638    }
639
640    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
641    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
642
643    /** The default value used for generating search result excerpts (1024 chars). */
644    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
645
646    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
647    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
648
649    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
650    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
651
652    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
653    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
654
655    /** The default maximal wait time for re-indexing after editing a content. */
656    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
657
658    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
659    public static final int DEFAULT_TIMEOUT = 60000;
660
661    /** Scheduler parameter: Update only a specified list of indexes. */
662    public static final String JOB_PARAM_INDEXLIST = "indexList";
663
664    /** Scheduler parameter: Write the output of the update to the logfile. */
665    public static final String JOB_PARAM_WRITELOG = "writeLog";
666
667    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
668    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
669
670    /** The log object for this class. */
671    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
672
673    /** The administrator OpenCms user context to access OpenCms VFS resources. */
674    protected CmsObject m_adminCms;
675
676    /** The list of indexes that are configured for offline index mode. */
677    protected List<I_CmsSearchIndex> m_offlineIndexes;
678
679    /** The thread used of offline indexing. */
680    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
681
682    /** Configured analyzers for languages using &lt;analyzer&gt;. */
683    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
684
685    /** Stores the offline update frequency while indexing is paused. */
686    private long m_configuredOfflineIndexingFrequency;
687
688    /** The Solr core container. */
689    private CoreContainer m_coreContainer;
690
691    /** A map of document factory configurations. */
692    private List<CmsSearchDocumentType> m_documentTypeConfigs;
693
694    /** A map of document factories keyed first by their name and then by their extraction keys. */
695    private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes;
696
697    /** The set of all globally available extraction keys for document factories. */
698    private Set<String> m_extractionKeys;
699
700    /** The max age for extraction results to remain in the cache. */
701    private float m_extractionCacheMaxAge;
702
703    /** The cache for the extraction results. */
704    private CmsExtractionResultCache m_extractionResultCache;
705
706    /** Contains the available field configurations. */
707    private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations;
708
709    /** The force unlock type. */
710    private CmsSearchForceUnlockMode m_forceUnlockMode;
711
712    /** The class used to highlight the search terms in the excerpt of a search result. */
713    private I_CmsTermHighlighter m_highlighter;
714
715    /** A list of search indexes. */
716    private List<I_CmsSearchIndex> m_indexes;
717
718    /** Seconds to wait for an index lock. */
719    private int m_indexLockMaxWaitSeconds = 10;
720
721    /** Configured index sources. */
722    private Map<String, CmsSearchIndexSource> m_indexSources;
723
724    /** The max. char. length of the excerpt in the search result. */
725    private int m_maxExcerptLength;
726
727    /** The maximum number of modifications before a commit in the search index is triggered. */
728    private int m_maxModificationsBeforeCommit;
729
730    /** The offline index search handler. */
731    private CmsSearchOfflineHandler m_offlineHandler;
732
733    /** The update frequency of the offline indexer in milliseconds. */
734    private long m_offlineUpdateFrequency;
735
736    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
737    private long m_maxIndexWaitTime;
738
739    /** Path to index files below WEB-INF/. */
740    private String m_path;
741
742    /** The Solr configuration. */
743    private CmsSolrConfiguration m_solrConfig;
744
745    /** Timeout for abandoning indexing thread. */
746    private long m_timeout;
747
748    /**
749     * Default constructor when called as cron job.<p>
750     */
751    public CmsSearchManager() {
752
753        m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>();
754        m_extractionKeys = new HashSet<String>();
755        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
756        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
757        m_indexes = new ArrayList<I_CmsSearchIndex>();
758        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
759        m_offlineHandler = new CmsSearchOfflineHandler();
760        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
761        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
762        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
763        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
764        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
765
766        m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>();
767        // make sure we have a "standard" field configuration
768        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
769
770        if (CmsLog.INIT.isInfoEnabled()) {
771            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
772        }
773    }
774
775    /**
776     * Returns an analyzer for the given class name.<p>
777     *
778     * @param className the class name of the analyzer
779     *
780     * @return the appropriate lucene analyzer
781     *
782     * @throws Exception if something goes wrong
783     */
784    public static Analyzer getAnalyzer(String className) throws Exception {
785
786        Analyzer analyzer = null;
787        Class<?> analyzerClass;
788        try {
789            analyzerClass = Class.forName(className);
790        } catch (ClassNotFoundException e) {
791            // allow Lucene standard classes to be written in a short form
792            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
793        }
794
795        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
796        if (StandardAnalyzer.class.equals(analyzerClass)) {
797            // the Lucene standard analyzer is used - but without any stopwords.
798            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
799        } else {
800            analyzer = (Analyzer)analyzerClass.newInstance();
801        }
802        return analyzer;
803    }
804
805    /**
806     * Returns the Solr index configured with the parameters name.
807     * The parameters must contain a key/value pair with an existing
808     * Solr index, otherwise <code>null</code> is returned.<p>
809     *
810     * @param cms the current context
811     * @param params the parameter map
812     *
813     * @return the best matching Solr index
814     */
815    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
816
817        String indexName = null;
818        CmsSolrIndex index = null;
819        // try to get the index name from the parameters: 'core' or 'index'
820        if (params != null) {
821            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
822            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
823            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
824            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
825            : null);
826        }
827        if (indexName == null) {
828            // if no parameter is specified try to use the default online/offline indexes by context
829            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
830            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
831            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
832        }
833        // try to get the index
834        index = OpenCms.getSearchManager().getIndexSolr(indexName);
835        if (index == null) {
836            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
837            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
838            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
839                index = solrs.get(0);
840            }
841        }
842        return index;
843    }
844
845    /**
846     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
847     *
848     * @param indexName the name of the index to check
849     *
850     * @return <code>true</code> if the index for the given name is a Lucene index
851     */
852    public static boolean isLuceneIndex(String indexName) {
853
854        I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
855        return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex));
856    }
857
858    /**
859     * Adds an analyzer.<p>
860     *
861     * @param analyzer an analyzer
862     */
863    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
864
865        m_analyzers.put(analyzer.getLocale(), analyzer);
866
867        if (CmsLog.INIT.isInfoEnabled()) {
868            CmsLog.INIT.info(
869                Messages.get().getBundle().key(
870                    Messages.INIT_ADD_ANALYZER_2,
871                    analyzer.getLocale(),
872                    analyzer.getClassName()));
873        }
874    }
875
876    /**
877     * Adds a document type.<p>
878     *
879     * @param documentType a document type
880     */
881    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
882
883        m_documentTypeConfigs.add(documentType);
884
885        if (CmsLog.INIT.isInfoEnabled()) {
886            CmsLog.INIT.info(
887                Messages.get().getBundle().key(
888                    Messages.INIT_SEARCH_DOC_TYPES_2,
889                    documentType.getName(),
890                    documentType.getClassName()));
891        }
892    }
893
894    /**
895     * Adds a search field configuration to the search manager.<p>
896     *
897     * @param fieldConfiguration the search field configuration to add
898     */
899    public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
900
901        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
902    }
903
904    /**
905     * Adds a search index to the configuration.<p>
906     *
907     * @param searchIndex the search index to add
908     */
909    public void addSearchIndex(I_CmsSearchIndex searchIndex) {
910
911        if (!searchIndex.isInitialized()) {
912            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
913                try {
914                    searchIndex.initialize();
915                } catch (CmsException e) {
916                    // should never happen
917                    LOG.error(e.getMessage(), e);
918                }
919            }
920        }
921
922        // name: not null or emtpy and unique
923        String name = searchIndex.getName();
924        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
925            throw new CmsIllegalArgumentException(
926                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
927        }
928        if (m_indexSources.keySet().contains(name)) {
929            throw new CmsIllegalArgumentException(
930                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
931        }
932
933        m_indexes.add(searchIndex);
934        if (m_adminCms != null) {
935            initOfflineIndexes();
936        }
937
938        if (CmsLog.INIT.isInfoEnabled()) {
939            CmsLog.INIT.info(
940                Messages.get().getBundle().key(
941                    Messages.INIT_ADD_SEARCH_INDEX_2,
942                    searchIndex.getName(),
943                    searchIndex.getProject()));
944        }
945    }
946
947    /**
948     * Adds a search index source configuration.<p>
949     *
950     * @param searchIndexSource a search index source configuration
951     */
952    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
953
954        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
955
956        if (CmsLog.INIT.isInfoEnabled()) {
957            CmsLog.INIT.info(
958                Messages.get().getBundle().key(
959                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
960                    searchIndexSource.getName(),
961                    searchIndexSource.getIndexerClassName()));
962        }
963    }
964
965    /**
966     * Implements the event listener of this class.<p>
967     *
968     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
969     */
970    public void cmsEvent(CmsEvent event) {
971
972        switch (event.getType()) {
973            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
974                List<String> indexNames = null;
975                if ((event.getData() != null)
976                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
977                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
978                    indexNames = CmsStringUtil.splitAsList(
979                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
980                        ",",
981                        true);
982                }
983                try {
984                    if (LOG.isDebugEnabled()) {
985                        LOG.debug(
986                            Messages.get().getBundle().key(
987                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
988                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
989                            new Exception());
990                    }
991                    if (indexNames == null) {
992                        rebuildAllIndexes(getEventReport(event));
993                    } else {
994                        rebuildIndexes(indexNames, getEventReport(event));
995                    }
996                } catch (CmsException e) {
997                    if (LOG.isErrorEnabled()) {
998                        LOG.error(
999                            Messages.get().getBundle().key(
1000                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
1001                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1002                            e);
1003                    }
1004                }
1005                break;
1006            case I_CmsEventListener.EVENT_CLEAR_CACHES:
1007                if (LOG.isDebugEnabled()) {
1008                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
1009                }
1010                break;
1011            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
1012                // event data contains a list of the published resources
1013                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1014                if (LOG.isDebugEnabled()) {
1015                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1016                }
1017                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1018                if (LOG.isDebugEnabled()) {
1019                    LOG.debug(
1020                        Messages.get().getBundle().key(
1021                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1022                            publishHistoryId));
1023                }
1024                break;
1025            case I_CmsEventListener.EVENT_REINDEX_OFFLINE:
1026            case I_CmsEventListener.EVENT_REINDEX_ONLINE:
1027                boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType();
1028                Map<String, Object> eventData = event.getData();
1029                CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID);
1030                CmsUser user = null;
1031                try {
1032                    user = m_adminCms.readUser(userId);
1033                } catch (Throwable t) {
1034                    // should never happen
1035                }
1036                try {
1037                    SEARCH_MANAGER_LOCK.lock();
1038                    if (LOG.isDebugEnabled()) {
1039                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0));
1040                    }
1041                    CmsObject cms = m_adminCms;
1042                    if (!isOnline) {
1043                        OpenCms.initCmsObject(m_adminCms);
1044                        cms.getRequestContext().setCurrentProject(
1045                            cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID)));
1046                    }
1047                    @SuppressWarnings("unchecked")
1048                    List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES);
1049                    I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT);
1050                    List<CmsResource> resourcesToIndex = new ArrayList<>();
1051                    for (CmsResource res : resources) {
1052                        if (res.isFile()) {
1053                            resourcesToIndex.add(res);
1054                        } else {
1055                            try {
1056                                resourcesToIndex.addAll(
1057                                    cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true));
1058                            } catch (CmsException e) {
1059                                LOG.error(e, e);
1060                            }
1061                        }
1062                    }
1063                    // we reindex and prevent using cached results
1064                    cleanExtractionCache();
1065                    List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map(
1066                        res -> new CmsPublishedResource(res)).collect(Collectors.toList());
1067                    if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) {
1068                        addAdditionallyAffectedResources(cms, publishedResourcesToIndex);
1069                    }
1070                    if (isOnline) {
1071                        updateAllIndexes(
1072                            m_adminCms,
1073                            publishedResourcesToIndex,
1074                            new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE));
1075                    } else {
1076                        updateIndexOffline(report, publishedResourcesToIndex);
1077                    }
1078                    cms = null;
1079                    SEARCH_MANAGER_LOCK.unlock();
1080                    if (null != user) {
1081                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1082                        OpenCms.getSessionManager().sendBroadcast(
1083                            null,
1084                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0),
1085                            user,
1086                            ContentMode.html);
1087                    }
1088                    if (LOG.isDebugEnabled()) {
1089                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0));
1090                    }
1091
1092                } catch (Throwable e) {
1093                    if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) {
1094                        SEARCH_MANAGER_LOCK.unlock();
1095                    }
1096                    if (null != user) {
1097                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1098                        OpenCms.getSessionManager().sendBroadcast(
1099                            null,
1100                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0),
1101                            user,
1102                            ContentMode.html);
1103                    }
1104                    if (LOG.isDebugEnabled()) {
1105                        LOG.error(
1106                            Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()),
1107                            e);
1108                    } else if (LOG.isErrorEnabled()) {
1109                        LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()));
1110                    }
1111                }
1112                break;
1113            default:
1114                // no operation
1115        }
1116    }
1117
1118    /**
1119     * Returns all Solr index.<p>
1120     *
1121     * @return all Solr indexes
1122     */
1123    public List<CmsSolrIndex> getAllSolrIndexes() {
1124
1125        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1126        for (String indexName : getIndexNames()) {
1127            CmsSolrIndex index = getIndexSolr(indexName);
1128            if (index != null) {
1129                result.add(index);
1130            }
1131        }
1132        return result;
1133    }
1134
1135    /**
1136     * Returns an analyzer for the given language.<p>
1137     *
1138     * The analyzer is selected according to the analyzer configuration.<p>
1139     *
1140     * @param locale the locale to get the analyzer for
1141     * @return the appropriate lucene analyzer
1142     *
1143     * @throws CmsSearchException if something goes wrong
1144     */
1145    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1146
1147        Analyzer analyzer = null;
1148        String className = null;
1149
1150        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1151        if (analyzerConf == null) {
1152            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1153        }
1154
1155        try {
1156            analyzer = getAnalyzer(analyzerConf.getClassName());
1157        } catch (Exception e) {
1158            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1159        }
1160
1161        return analyzer;
1162    }
1163
1164    /**
1165     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1166     *
1167     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1168     *
1169     * @return an unmodifiable view of the Analyzers Map
1170     */
1171    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1172
1173        return Collections.unmodifiableMap(m_analyzers);
1174    }
1175
1176    /**
1177     * Returns the search analyzer for the given locale.<p>
1178     *
1179     * @param locale the locale to get the analyzer for
1180     *
1181     * @return the search analyzer for the given locale
1182     */
1183    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1184
1185        return m_analyzers.get(locale);
1186    }
1187
1188    /**
1189     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1190     *
1191     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1192     */
1193    public String getDirectory() {
1194
1195        return m_path;
1196    }
1197
1198    /**
1199     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1200     *
1201     * @return the Solr home directory
1202     */
1203    public String getDirectorySolr() {
1204
1205        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1206    }
1207
1208    /**
1209     * Returns the document factory configured under the provided name.
1210     * @param docTypeName the name of the document type.
1211     * @return the factory for the provided name.
1212     */
1213    public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) {
1214
1215        Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName);
1216        if (factoryMap != null) {
1217            Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator();
1218            if (factoryIt.hasNext()) {
1219                return factoryMap.values().iterator().next();
1220            }
1221        }
1222        return null;
1223    }
1224
1225    /**
1226     * Returns a document type config.<p>
1227     *
1228     * @param name the name of the document type config
1229     * @return the document type config.
1230     */
1231    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1232
1233        // this is really used only for the search manager GUI,
1234        // so performance is not an issue and no lookup map is generated
1235        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1236            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1237            if (type.getName().equals(name)) {
1238                return type;
1239            }
1240        }
1241        return null;
1242    }
1243
1244    /**
1245     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1246     *
1247     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1248     */
1249    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1250
1251        return Collections.unmodifiableList(m_documentTypeConfigs);
1252    }
1253
1254    /**
1255     * Returns the document type keys used to specify the correct document factory.
1256     *
1257     * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys.
1258     *
1259     * @param resource the resource to generate the list of document type keys for.
1260     * @return the document type keys.
1261     */
1262    public List<String> getDocumentTypeKeys(CmsResource resource) {
1263
1264        // first get the MIME type of the resource
1265        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1266        String resourceType = null;
1267        try {
1268            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1269        } catch (CmsLoaderException e) {
1270            // ignore, unknown resource type, resource can not be indexed
1271            LOG.info(e.getLocalizedMessage(), e);
1272        }
1273        return getDocumentTypeKeys(resourceType, mimeType);
1274    }
1275
1276    /**
1277     * Returns the document type keys used to specify the correct document factory.
1278     * One resource typically has more than one key. The document factories are matched
1279     * in the provided order and the first matching factory is used.
1280     *
1281     * The keys for type name "typename" and mimetype "mimetype" would be a subset of:
1282     * <ul>
1283     *  <li><code>typename_mimetype</code></li>
1284     *  <li><code>typename</code></li>
1285     *  <li>if <code>typename</code> is a sub-type of <code>containerpage</code>
1286     *      <ul>
1287     *          <li><code>containerpage_mimetype</code></li>
1288     *          <li><code>containerpage</code></li>
1289     *      </ul>
1290     *  </li>
1291     *  <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code>
1292     *      <ul>
1293     *          <li><code>xmlcontent_mimetype</code></li>
1294     *          <li><code>xmlcontent</code></li>
1295     *      </ul>
1296     *  </li>
1297     *  <li><code>__unconfigured___mimetype</code></li>
1298     *  <li><code>__unconfigured__</code></li>
1299     *  <li><code>__all___mimetype</code></li>
1300     *  <li><code>__all__</code></li>
1301     * <ul>
1302     * Note that all keys except the "__all__"-keys are only added as long as globally
1303     * there is no matching factory for the key.
1304     * This in particular means that a factory matching "typename" will never be used
1305     * if you have a factory for "typename__mimetype" - even if this is not configured
1306     * for the used index source. Eventually, the content will not be indexed in such cases.
1307     * @param resourceType the resource type to generate the list of document type keys for.
1308     * @param mimeType the mime type to generate the list of document type keys for.
1309     * @return the document type keys.
1310     */
1311    public List<String> getDocumentTypeKeys(String resourceType, String mimeType) {
1312
1313        List<String> result = new ArrayList<>(8);
1314        if (null != resourceType) {
1315            String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1316            result.add(currentKey);
1317            if (!m_extractionKeys.contains(currentKey)) {
1318                currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null);
1319                result.add(currentKey);
1320                if (!m_extractionKeys.contains(currentKey)) {
1321                    boolean hasGlobalMatch = false;
1322                    try {
1323                        String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName();
1324                        I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType);
1325                        if (!resourceType.equals(containerpageTypeName)) {
1326                            if (type instanceof CmsResourceTypeXmlContainerPage) {
1327                                if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) {
1328                                    currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType);
1329                                    result.add(currentKey);
1330                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1331                                    if (!hasGlobalMatch) {
1332                                        currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null);
1333                                        result.add(currentKey);
1334                                        hasGlobalMatch = m_extractionKeys.contains(currentKey);
1335                                    }
1336                                }
1337                            }
1338                        }
1339                        String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName();
1340                        if (!resourceType.equals(containerpageTypeName)) {
1341                            if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) {
1342                                currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType);
1343                                result.add(currentKey);
1344                                hasGlobalMatch = m_extractionKeys.contains(currentKey);
1345                                if (!hasGlobalMatch) {
1346                                    currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null);
1347                                    result.add(currentKey);
1348                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1349                                }
1350                            }
1351                        }
1352                    } catch (Throwable t) {
1353                        LOG.warn("Could not read type for name \"" + resourceType + "\".", t);
1354                    }
1355                    if (!hasGlobalMatch) {
1356                        result.add(
1357                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType));
1358                        result.add(
1359                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null));
1360                    }
1361                }
1362            }
1363            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType));
1364            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null));
1365        }
1366        return result;
1367
1368    }
1369
1370    /**
1371     * Returns the map from document type keys to document factories with all entries for the provided document type names.
1372     * @param documentTypeNames list of document type names to generate the map for.
1373     * @return the map from document type keys to document factories.
1374     */
1375    public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) {
1376
1377        Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>();
1378        if (null != documentTypeNames) {
1379            // Iterate the list in reverse order to prefer factories that are added by document types listed earlier.
1380            ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size());
1381            while (typesIterator.hasPrevious()) {
1382                Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous());
1383                if (null != factories) {
1384                    result.putAll(factories);
1385                }
1386            }
1387        }
1388        return result;
1389    }
1390
1391    /**
1392     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1393     *
1394     * @return the maximum age a text extraction result is kept in the cache (in hours)
1395     */
1396    public float getExtractionCacheMaxAge() {
1397
1398        return m_extractionCacheMaxAge;
1399    }
1400
1401    /**
1402     * Returns the search field configuration with the given name.<p>
1403     *
1404     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1405     *
1406     * @param name the name to get the search field configuration for
1407     *
1408     * @return the search field configuration with the given name
1409     */
1410    public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1411
1412        return m_fieldConfigurations.get(name);
1413    }
1414
1415    /**
1416     * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p>
1417     *
1418     * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries
1419     */
1420    public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() {
1421
1422        List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>(
1423            m_fieldConfigurations.values());
1424        Collections.sort(result);
1425        return Collections.unmodifiableList(result);
1426    }
1427
1428    /**
1429     * Returns the Lucene search field configurations only.<p>
1430     *
1431     * @return the Lucene search field configurations
1432     */
1433    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1434
1435        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1436        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1437            if (conf instanceof CmsLuceneFieldConfiguration) {
1438                result.add((CmsLuceneFieldConfiguration)conf);
1439            }
1440        }
1441        Collections.sort(result);
1442        return Collections.unmodifiableList(result);
1443    }
1444
1445    /**
1446     * Returns the Solr search field configurations only.<p>
1447     *
1448     * @return the Solr search field configurations
1449     */
1450    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1451
1452        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1453        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1454            if (conf instanceof CmsSolrFieldConfiguration) {
1455                result.add((CmsSolrFieldConfiguration)conf);
1456            }
1457        }
1458        Collections.sort(result);
1459        return Collections.unmodifiableList(result);
1460    }
1461
1462    /**
1463     * Returns the force unlock mode during indexing.<p>
1464     *
1465     * @return the force unlock mode during indexing
1466     */
1467    public CmsSearchForceUnlockMode getForceunlock() {
1468
1469        return m_forceUnlockMode;
1470    }
1471
1472    /**
1473     * Returns the highlighter.<p>
1474     *
1475     * @return the highlighter
1476     */
1477    public I_CmsTermHighlighter getHighlighter() {
1478
1479        return m_highlighter;
1480    }
1481
1482    /**
1483     * Returns the Lucene search index configured with the given name.<p>
1484     * The index must exist, otherwise <code>null</code> is returned.
1485     *
1486     * @param indexName then name of the requested search index
1487     *
1488     * @return the Lucene search index configured with the given name
1489     */
1490    public I_CmsSearchIndex getIndex(String indexName) {
1491
1492        for (I_CmsSearchIndex index : m_indexes) {
1493            if (indexName.equalsIgnoreCase(index.getName())) {
1494                return index;
1495            }
1496        }
1497        return null;
1498    }
1499
1500    /**
1501     * Returns the seconds to wait for an index lock during an update operation.<p>
1502     *
1503     * @return the seconds to wait for an index lock during an update operation
1504     */
1505    public int getIndexLockMaxWaitSeconds() {
1506
1507        return m_indexLockMaxWaitSeconds;
1508    }
1509
1510    /**
1511     * Returns the names of all configured indexes.<p>
1512     *
1513     * @return list of names
1514     */
1515    public List<String> getIndexNames() {
1516
1517        List<String> indexNames = new ArrayList<String>();
1518        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1519            indexNames.add((m_indexes.get(i)).getName());
1520        }
1521
1522        return indexNames;
1523    }
1524
1525    /**
1526     * Returns the Solr index configured with the given name.<p>
1527     * The index must exist, otherwise <code>null</code> is returned.
1528     *
1529     * @param indexName then name of the requested Solr index
1530     * @return the Solr index configured with the given name
1531     */
1532    public CmsSolrIndex getIndexSolr(String indexName) {
1533
1534        I_CmsSearchIndex index = getIndex(indexName);
1535        if (index instanceof CmsSolrIndex) {
1536            return (CmsSolrIndex)index;
1537        }
1538        return null;
1539    }
1540
1541    /**
1542     * Returns a search index source for a specified source name.<p>
1543     *
1544     * @param sourceName the name of the index source
1545     * @return a search index source
1546     */
1547    public CmsSearchIndexSource getIndexSource(String sourceName) {
1548
1549        return m_indexSources.get(sourceName);
1550    }
1551
1552    /**
1553     * Returns the max. excerpt length.<p>
1554     *
1555     * @return the max excerpt length
1556     */
1557    public int getMaxExcerptLength() {
1558
1559        return m_maxExcerptLength;
1560    }
1561
1562    /**
1563     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1564     *
1565     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1566     */
1567    public long getMaxIndexWaitTime() {
1568
1569        return m_maxIndexWaitTime;
1570    }
1571
1572    /**
1573     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1574     *
1575     * @return the maximum number of modifications before a commit in the search index is triggered
1576     */
1577    public int getMaxModificationsBeforeCommit() {
1578
1579        return m_maxModificationsBeforeCommit;
1580    }
1581
1582    /**
1583     * Returns the update frequency of the offline indexer in milliseconds.<p>
1584     *
1585     * @return the update frequency of the offline indexer in milliseconds
1586     */
1587    public long getOfflineUpdateFrequency() {
1588
1589        return m_offlineUpdateFrequency;
1590    }
1591
1592    /**
1593     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1594     *
1595     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1596     */
1597    public List<I_CmsSearchIndex> getSearchIndexes() {
1598
1599        return Collections.unmodifiableList(m_indexes);
1600    }
1601
1602    /**
1603     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1604     *
1605     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1606     */
1607    public List<I_CmsSearchIndex> getSearchIndexesAll() {
1608
1609        return Collections.unmodifiableList(m_indexes);
1610    }
1611
1612    /**
1613     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1614     *
1615     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1616     */
1617    public List<CmsSolrIndex> getSearchIndexesSolr() {
1618
1619        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1620        for (I_CmsSearchIndex index : m_indexes) {
1621            if (index instanceof CmsSolrIndex) {
1622                indexes.add((CmsSolrIndex)index);
1623            }
1624        }
1625        return Collections.unmodifiableList(indexes);
1626    }
1627
1628    /**
1629     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1630     *
1631     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1632     */
1633    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1634
1635        return Collections.unmodifiableMap(m_indexSources);
1636    }
1637
1638    /**
1639     * Return singleton instance of the OpenCms spellchecker.<p>
1640     *
1641     * @return instance of CmsSolrSpellchecker.
1642     */
1643    public CmsSolrSpellchecker getSolrDictionary() {
1644
1645        // get the core container that contains one core for each configured index
1646        if (m_coreContainer == null) {
1647            m_coreContainer = createCoreContainer();
1648        }
1649        return CmsSolrSpellchecker.getInstance(m_coreContainer);
1650    }
1651
1652    /**
1653     * Returns the Solr configuration.<p>
1654     *
1655     * @return the Solr configuration
1656     */
1657    public CmsSolrConfiguration getSolrServerConfiguration() {
1658
1659        return m_solrConfig;
1660    }
1661
1662    /**
1663     * Returns the timeout to abandon threads indexing a resource.<p>
1664     *
1665     * @return the timeout to abandon threads indexing a resource
1666     */
1667    public long getTimeout() {
1668
1669        return m_timeout;
1670    }
1671
1672    /**
1673     * Initializes the search manager.<p>
1674     *
1675     * @param cms the cms object
1676     *
1677     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1678     */
1679    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1680
1681        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1682        try {
1683            // store the Admin cms to index Cms resources
1684            m_adminCms = OpenCms.initCmsObject(cms);
1685        } catch (CmsException e) {
1686            // this should never happen
1687            LOG.error(e.getLocalizedMessage(), e);
1688        }
1689        // make sure the site root is the root site
1690        m_adminCms.getRequestContext().setSiteRoot("/");
1691
1692        // create the extraction result cache
1693        m_extractionResultCache = new CmsExtractionResultCache(
1694            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1695            "/extractCache");
1696        initializeFieldConfigurations();
1697        initializeIndexes();
1698        initOfflineIndexes();
1699
1700        // register this object as event listener
1701        OpenCms.addCmsEventListener(
1702            this,
1703            new int[] {
1704                I_CmsEventListener.EVENT_CLEAR_CACHES,
1705                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1706                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES,
1707                I_CmsEventListener.EVENT_REINDEX_OFFLINE,
1708                I_CmsEventListener.EVENT_REINDEX_ONLINE});
1709    }
1710
1711    /**
1712     * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations.
1713     */
1714    public void initializeFieldConfigurations() {
1715
1716        for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) {
1717            config.init();
1718        }
1719
1720    }
1721
1722    /**
1723     * Initializes all configured document types, index sources and search indexes.<p>
1724     *
1725     * This methods needs to be called if after a change in the index configuration has been made.
1726     */
1727    public void initializeIndexes() {
1728
1729        initAvailableDocumentTypes();
1730        initIndexSources();
1731        initSearchIndexes();
1732    }
1733
1734    /**
1735     * Initialize the offline index handler, require after an offline index has been added.<p>
1736     */
1737    public void initOfflineIndexes() {
1738
1739        // check which indexes are configured as offline indexes
1740        List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>();
1741        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
1742        while (i.hasNext()) {
1743            I_CmsSearchIndex index = i.next();
1744            if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1745                // this is an offline index
1746                offlineIndexes.add(index);
1747            }
1748        }
1749        m_offlineIndexes = offlineIndexes;
1750        m_offlineHandler.initialize();
1751
1752    }
1753
1754    /**
1755     * Initializes the spell check index.<p>
1756     *
1757     * @param adminCms the ROOT_ADMIN cms context
1758     */
1759    public void initSpellcheckIndex(CmsObject adminCms) {
1760
1761        if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) {
1762            final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary();
1763            if (spellchecker != null) {
1764
1765                Runnable initRunner = new Runnable() {
1766
1767                    public void run() {
1768
1769                        try {
1770                            spellchecker.parseAndAddDictionaries(adminCms);
1771                        } catch (CmsRoleViolationException e) {
1772                            LOG.error(e.getLocalizedMessage(), e);
1773                        }
1774                    }
1775                };
1776                new Thread(initRunner).start();
1777            }
1778        }
1779    }
1780
1781    /**
1782     * Returns if the offline indexing is paused.<p>
1783     *
1784     * @return <code>true</code> if the offline indexing is paused
1785     */
1786    public boolean isOfflineIndexingPaused() {
1787
1788        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1789    }
1790
1791    /**
1792     * Updates the indexes from as a scheduled job.<p>
1793     *
1794     * @param cms the OpenCms user context to use when reading resources from the VFS
1795     * @param parameters the parameters for the scheduled job
1796     *
1797     * @throws Exception if something goes wrong
1798     *
1799     * @return the String to write in the scheduler log
1800     *
1801     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1802     */
1803    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1804
1805        CmsSearchManager manager = OpenCms.getSearchManager();
1806
1807        I_CmsReport report = null;
1808        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1809
1810        if (writeLog) {
1811            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1812        }
1813
1814        List<String> updateList = null;
1815        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1816        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1817            // index list has been provided as job parameter
1818            updateList = new ArrayList<String>();
1819            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1820            for (int i = 0; i < indexNames.length; i++) {
1821                // check if the index actually exists
1822                if (manager.getIndex(indexNames[i]) != null) {
1823                    updateList.add(indexNames[i]);
1824                } else {
1825                    if (LOG.isWarnEnabled()) {
1826                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1827                    }
1828                }
1829            }
1830        }
1831
1832        long startTime = System.currentTimeMillis();
1833
1834        if (updateList == null) {
1835            // all indexes need to be updated
1836            manager.rebuildAllIndexes(report);
1837        } else {
1838            // rebuild only the selected indexes
1839            manager.rebuildIndexes(updateList, report);
1840        }
1841
1842        long runTime = System.currentTimeMillis() - startTime;
1843
1844        String finishMessage = Messages.get().getBundle().key(
1845            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1846            CmsStringUtil.formatRuntime(runTime));
1847
1848        if (LOG.isInfoEnabled()) {
1849            LOG.info(finishMessage);
1850        }
1851        return finishMessage;
1852    }
1853
1854    /**
1855     * Pauses the offline indexing.<p>
1856     * May take some time, because the indexes are updated first.<p>
1857     */
1858    public void pauseOfflineIndexing() {
1859
1860        if (m_offlineUpdateFrequency != Long.MAX_VALUE) {
1861            m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1862            m_offlineUpdateFrequency = Long.MAX_VALUE;
1863            updateOfflineIndexes(0);
1864        }
1865    }
1866
1867    /**
1868     * Rebuilds (if required creates) all configured indexes.<p>
1869     *
1870     * @param report the report object to write messages (or <code>null</code>)
1871     *
1872     * @throws CmsException if something goes wrong
1873     */
1874    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1875
1876        try {
1877            SEARCH_MANAGER_LOCK.lock();
1878
1879            CmsMessageContainer container = null;
1880            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1881                // iterate all configured search indexes
1882                I_CmsSearchIndex searchIndex = m_indexes.get(i);
1883                try {
1884                    // update the index
1885                    updateIndex(searchIndex, report, null);
1886                } catch (CmsException e) {
1887                    container = new CmsMessageContainer(
1888                        Messages.get(),
1889                        Messages.ERR_INDEX_REBUILD_ALL_1,
1890                        new Object[] {searchIndex.getName()});
1891                    LOG.error(
1892                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1893                        e);
1894                }
1895            }
1896            // clean up the extraction result cache
1897            cleanExtractionCache();
1898            if (container != null) {
1899                // throw stored exception
1900                throw new CmsSearchException(container);
1901            }
1902        } finally {
1903            SEARCH_MANAGER_LOCK.unlock();
1904        }
1905    }
1906
1907    /**
1908     * Rebuilds (if required creates) the index with the given name.<p>
1909     *
1910     * @param indexName the name of the index to rebuild
1911     * @param report the report object to write messages (or <code>null</code>)
1912     *
1913     * @throws CmsException if something goes wrong
1914     */
1915    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1916
1917        try {
1918            SEARCH_MANAGER_LOCK.lock();
1919            // get the search index by name
1920            I_CmsSearchIndex index = getIndex(indexName);
1921            // update the index
1922            updateIndex(index, report, null);
1923            // clean up the extraction result cache
1924            cleanExtractionCache();
1925        } finally {
1926            SEARCH_MANAGER_LOCK.unlock();
1927        }
1928    }
1929
1930    /**
1931     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1932     *
1933     * @param indexNames the names (String) of the index to rebuild
1934     * @param report the report object to write messages (or <code>null</code>)
1935     *
1936     * @throws CmsException if something goes wrong
1937     */
1938    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1939
1940        try {
1941            SEARCH_MANAGER_LOCK.lock();
1942            Iterator<String> i = indexNames.iterator();
1943            while (i.hasNext()) {
1944                String indexName = i.next();
1945                // get the search index by name
1946                I_CmsSearchIndex index = getIndex(indexName);
1947                if (index != null) {
1948                    // update the index
1949                    updateIndex(index, report, null);
1950                } else {
1951                    if (LOG.isWarnEnabled()) {
1952                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1953                    }
1954                }
1955            }
1956            // clean up the extraction result cache
1957            cleanExtractionCache();
1958        } finally {
1959            SEARCH_MANAGER_LOCK.unlock();
1960        }
1961    }
1962
1963    /**
1964     * Registers a new Solr core for the given index.<p>
1965     *
1966     * @param index the index to register a new Solr core for
1967     *
1968     * @throws CmsConfigurationException if no Solr server is configured
1969     */
1970    @SuppressWarnings("resource")
1971    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
1972
1973        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
1974            // No solr server configured
1975            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
1976        }
1977
1978        if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present.
1979            index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build());
1980        } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present.
1981            // HTTP Server configured
1982            // TODO Implement multi core support for HTTP server
1983            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
1984            index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build());
1985        } else { // Default to the embedded Solr Server
1986
1987            // get the core container that contains one core for each configured index
1988            if (m_coreContainer == null) {
1989                m_coreContainer = createCoreContainer();
1990            }
1991
1992            // unload the existing core if it exists to avoid problems with forced unlock.
1993            if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) {
1994                m_coreContainer.unload(index.getCoreName(), false, false, true);
1995            }
1996            // ensure that all locks on the index are gone
1997            ensureIndexIsUnlocked(index.getPath());
1998
1999            // load the core to the container
2000            File dataDir = new File(index.getPath());
2001            if (!dataDir.exists()) {
2002                dataDir.mkdirs();
2003                if (CmsLog.INIT.isInfoEnabled()) {
2004                    CmsLog.INIT.info(
2005                        Messages.get().getBundle().key(
2006                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2007                            index.getName(),
2008                            index.getPath()));
2009                }
2010            }
2011            File instanceDir = new File(
2012                m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
2013            if (!instanceDir.exists()) {
2014                instanceDir.mkdirs();
2015                if (CmsLog.INIT.isInfoEnabled()) {
2016                    CmsLog.INIT.info(
2017                        Messages.get().getBundle().key(
2018                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2019                            index.getName(),
2020                            index.getPath()));
2021                }
2022            }
2023
2024            // create the core
2025            // TODO: suboptimal - forces always the same schema
2026            SolrCore core = null;
2027            try {
2028                // creation includes registration.
2029                // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
2030                Map<String, String> properties = new HashMap<String, String>(3);
2031                properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
2032                properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
2033                core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false);
2034            } catch (NullPointerException e) {
2035                if (core != null) {
2036                    core.close();
2037                }
2038                throw new CmsConfigurationException(
2039                    Messages.get().container(
2040                        Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
2041                        index.getName() + " (" + index.getCoreName() + ")",
2042                        index.getPath(),
2043                        m_solrConfig.getSolrConfigFile().getAbsolutePath()),
2044                    e);
2045            }
2046
2047            if (index.isNoSolrServerSet()) {
2048                index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
2049            }
2050            if (CmsLog.INIT.isInfoEnabled()) {
2051                CmsLog.INIT.info(
2052                    Messages.get().getBundle().key(
2053                        Messages.INIT_SOLR_SERVER_CREATED_1,
2054                        index.getName() + " (" + index.getCoreName() + ")"));
2055            }
2056        }
2057    }
2058
2059    /**
2060     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
2061     *
2062     * @param fieldConfiguration the field configuration to remove from the configuration
2063     *
2064     * @return true if remove was successful, false if preconditions for removal are ok but the given
2065     *         field configuration was unknown to the manager.
2066     *
2067     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
2068     *         <code>{@link I_CmsSearchIndex}</code>.
2069     *
2070     */
2071    public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration)
2072    throws CmsIllegalStateException {
2073
2074        // never remove the standard field configuration
2075        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
2076            throw new CmsIllegalStateException(
2077                Messages.get().container(
2078                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
2079                    fieldConfiguration.getName()));
2080        }
2081        // validation if removal will be granted
2082        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2083        I_CmsSearchIndex idx;
2084        // the list for collecting indexes that use the given field configuration
2085        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2086        I_CmsSearchFieldConfiguration refFieldConfig;
2087        while (itIndexes.hasNext()) {
2088            idx = itIndexes.next();
2089            refFieldConfig = idx.getFieldConfiguration();
2090            if (refFieldConfig.equals(fieldConfiguration)) {
2091                referrers.add(idx);
2092            }
2093        }
2094        if (referrers.size() > 0) {
2095            throw new CmsIllegalStateException(
2096                Messages.get().container(
2097                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
2098                    fieldConfiguration.getName(),
2099                    referrers.toString()));
2100        }
2101
2102        // remove operation (no exception)
2103        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
2104
2105    }
2106
2107    /**
2108     * Removes a search field from the field configuration.<p>
2109     *
2110     * @param fieldConfiguration the field configuration
2111     * @param field field to remove from the field configuration
2112     *
2113     * @return true if remove was successful, false if preconditions for removal are ok but the given
2114     *         field was unknown.
2115     */
2116    public boolean removeSearchFieldConfigurationField(
2117        I_CmsSearchFieldConfiguration fieldConfiguration,
2118        CmsSearchField field) {
2119
2120        if (LOG.isInfoEnabled()) {
2121            LOG.info(
2122                Messages.get().getBundle().key(
2123                    Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
2124                    field.getName(),
2125                    fieldConfiguration.getName()));
2126        }
2127
2128        return fieldConfiguration.getFields().remove(field);
2129    }
2130
2131    /**
2132     * Removes a search field mapping from the given field.<p>
2133     *
2134     * @param field the field
2135     * @param mapping mapping to remove from the field
2136     *
2137     * @return true if remove was successful, false if preconditions for removal are ok but the given
2138     *         mapping was unknown.
2139     *
2140     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
2141     */
2142    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
2143    throws CmsIllegalStateException {
2144
2145        if (field.getMappings().size() < 2) {
2146            throw new CmsIllegalStateException(
2147                Messages.get().container(
2148                    Messages.ERR_FIELD_MAPPING_DELETE_2,
2149                    mapping.getType().toString(),
2150                    field.getName()));
2151        } else {
2152
2153            if (LOG.isInfoEnabled()) {
2154                LOG.info(
2155                    Messages.get().getBundle().key(
2156                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
2157                        mapping.toString(),
2158                        field.getName()));
2159            }
2160            return field.getMappings().remove(mapping);
2161        }
2162    }
2163
2164    /**
2165     * Removes a search index from the configuration.<p>
2166     *
2167     * @param searchIndex the search index to remove
2168     */
2169    public void removeSearchIndex(I_CmsSearchIndex searchIndex) {
2170
2171        // shut down index to remove potential config files of Solr indexes
2172        searchIndex.shutDown();
2173        if (searchIndex instanceof CmsSolrIndex) {
2174            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
2175            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
2176        }
2177        m_indexes.remove(searchIndex);
2178        initOfflineIndexes();
2179
2180        if (LOG.isInfoEnabled()) {
2181            LOG.info(
2182                Messages.get().getBundle().key(
2183                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
2184                    searchIndex.getName(),
2185                    searchIndex.getProject()));
2186        }
2187    }
2188
2189    /**
2190     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
2191     *
2192     * @param indexNames the names of the index to remove
2193     */
2194    public void removeSearchIndexes(List<String> indexNames) {
2195
2196        Iterator<String> i = indexNames.iterator();
2197        while (i.hasNext()) {
2198            String indexName = i.next();
2199            // get the search index by name
2200            I_CmsSearchIndex index = getIndex(indexName);
2201            if (index != null) {
2202                // remove the index
2203                removeSearchIndex(index);
2204            } else {
2205                if (LOG.isWarnEnabled()) {
2206                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
2207                }
2208            }
2209        }
2210    }
2211
2212    /**
2213     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
2214     *
2215     * @param indexsource the indexsource to remove from the configuration
2216     *
2217     * @return true if remove was successful, false if preconditions for removal are ok but the given
2218     *         searchindex was unknown to the manager.
2219     *
2220     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
2221     *         <code>{@link I_CmsSearchIndex}</code>.
2222     *
2223     */
2224    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2225
2226        // validation if removal will be granted
2227        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2228        I_CmsSearchIndex idx;
2229        // the list for collecting indexes that use the given index source
2230        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2231        // the current list of referred index sources of the iterated index
2232        List<CmsSearchIndexSource> refsources;
2233        while (itIndexes.hasNext()) {
2234            idx = itIndexes.next();
2235            refsources = idx.getSources();
2236            if (refsources != null) {
2237                if (refsources.contains(indexsource)) {
2238                    referrers.add(idx);
2239                }
2240            }
2241        }
2242        if (referrers.size() > 0) {
2243            throw new CmsIllegalStateException(
2244                Messages.get().container(
2245                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2246                    indexsource.getName(),
2247                    referrers.toString()));
2248        }
2249
2250        // remove operation (no exception)
2251        return m_indexSources.remove(indexsource.getName()) != null;
2252
2253    }
2254
2255    /**
2256     * Resumes offline indexing if it was paused.<p>
2257     */
2258    public void resumeOfflineIndexing() {
2259
2260        if (m_offlineUpdateFrequency == Long.MAX_VALUE) {
2261            setOfflineUpdateFrequency(
2262                m_configuredOfflineIndexingFrequency > 0
2263                ? m_configuredOfflineIndexingFrequency
2264                : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2265        }
2266    }
2267
2268    /**
2269     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2270     *
2271     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2272     */
2273    public void setDirectory(String value) {
2274
2275        m_path = value;
2276    }
2277
2278    /**
2279     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2280     *
2281     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2282     */
2283    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2284
2285        m_extractionCacheMaxAge = extractionCacheMaxAge;
2286    }
2287
2288    /**
2289     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2290     *
2291     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2292     */
2293    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2294
2295        try {
2296            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2297        } catch (NumberFormatException e) {
2298            LOG.error(
2299                Messages.get().getBundle().key(
2300                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2301                    extractionCacheMaxAge,
2302                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2303                e);
2304            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2305        }
2306    }
2307
2308    /**
2309     * Sets the unlock mode during indexing.<p>
2310     *
2311     * @param value the value
2312     */
2313    public void setForceunlock(String value) {
2314
2315        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2316    }
2317
2318    /**
2319     * Sets the highlighter.<p>
2320     *
2321     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2322     *
2323     * @param highlighter the package/class name of the highlighter
2324     */
2325    public void setHighlighter(String highlighter) {
2326
2327        try {
2328            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2329        } catch (Exception e) {
2330            m_highlighter = null;
2331            LOG.error(e.getLocalizedMessage(), e);
2332        }
2333    }
2334
2335    /**
2336     * Sets the seconds to wait for an index lock during an update operation.<p>
2337     *
2338     * @param value the seconds to wait for an index lock during an update operation
2339     */
2340    public void setIndexLockMaxWaitSeconds(int value) {
2341
2342        m_indexLockMaxWaitSeconds = value;
2343    }
2344
2345    /**
2346     * Sets the max. excerpt length.<p>
2347     *
2348     * @param maxExcerptLength the max. excerpt length to set
2349     */
2350    public void setMaxExcerptLength(int maxExcerptLength) {
2351
2352        m_maxExcerptLength = maxExcerptLength;
2353    }
2354
2355    /**
2356     * Sets the max. excerpt length as a String.<p>
2357     *
2358     * @param maxExcerptLength the max. excerpt length to set
2359     */
2360    public void setMaxExcerptLength(String maxExcerptLength) {
2361
2362        try {
2363            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2364        } catch (Exception e) {
2365            LOG.error(
2366                Messages.get().getBundle().key(
2367                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2368                    maxExcerptLength,
2369                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2370                e);
2371            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2372        }
2373    }
2374
2375    /**
2376     * Sets the maximal wait time for offline index updates after edit operations.<p>
2377     *
2378     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2379     */
2380    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2381
2382        m_maxIndexWaitTime = maxIndexWaitTime;
2383    }
2384
2385    /**
2386     * Sets the maximal wait time for offline index updates after edit operations.<p>
2387     *
2388     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2389     */
2390    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2391
2392        try {
2393            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2394        } catch (Exception e) {
2395            LOG.error(
2396                Messages.get().getBundle().key(
2397                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2398                    maxIndexWaitTime,
2399                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2400                e);
2401            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2402        }
2403    }
2404
2405    /**
2406     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2407     *
2408     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2409     */
2410    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2411
2412        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2413    }
2414
2415    /**
2416     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2417     *
2418     * @param value the maximum number of modifications to set
2419     */
2420    public void setMaxModificationsBeforeCommit(String value) {
2421
2422        try {
2423            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2424        } catch (Exception e) {
2425            LOG.error(
2426                Messages.get().getBundle().key(
2427                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2428                    value,
2429                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2430                e);
2431            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2432        }
2433    }
2434
2435    /**
2436     * Sets the update frequency of the offline indexer in milliseconds.<p>
2437     *
2438     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2439     */
2440    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2441
2442        m_offlineUpdateFrequency = offlineUpdateFrequency;
2443        updateOfflineIndexes(0);
2444    }
2445
2446    /**
2447     * Sets the update frequency of the offline indexer in milliseconds.<p>
2448     *
2449     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2450     */
2451    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2452
2453        try {
2454            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2455        } catch (Exception e) {
2456            LOG.error(
2457                Messages.get().getBundle().key(
2458                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2459                    offlineUpdateFrequency,
2460                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2461                e);
2462            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2463        }
2464    }
2465
2466    /**
2467     * Sets the Solr configuration.<p>
2468     *
2469     * @param config the Solr configuration
2470     */
2471    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2472
2473        m_solrConfig = config;
2474    }
2475
2476    /**
2477     * Sets the timeout to abandon threads indexing a resource.<p>
2478     *
2479     * @param value the timeout in milliseconds
2480     */
2481    public void setTimeout(long value) {
2482
2483        m_timeout = value;
2484    }
2485
2486    /**
2487     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2488     *
2489     * @param value the timeout in milliseconds
2490     */
2491    public void setTimeout(String value) {
2492
2493        try {
2494            setTimeout(Long.parseLong(value));
2495        } catch (Exception e) {
2496            LOG.error(
2497                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2498                e);
2499            setTimeout(DEFAULT_TIMEOUT);
2500        }
2501    }
2502
2503    /**
2504     * Shuts down the search manager.<p>
2505     *
2506     * This will cause all search indices to be shut down.<p>
2507     */
2508    public void shutDown() {
2509
2510        if (m_offlineIndexThread != null) {
2511            m_offlineIndexThread.shutDown();
2512        }
2513
2514        if (m_offlineHandler != null) {
2515            OpenCms.removeCmsEventListener(m_offlineHandler);
2516        }
2517
2518        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2519        while (i.hasNext()) {
2520            I_CmsSearchIndex index = i.next();
2521            index.shutDown();
2522            index = null;
2523        }
2524        m_indexes.clear();
2525
2526        shutDownSolrContainer();
2527
2528        if (CmsLog.INIT.isInfoEnabled()) {
2529            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2530        }
2531    }
2532
2533    /**
2534     * Updates all offline indexes.<p>
2535     *
2536     * Can be used to force an index update when it's not convenient to wait until the
2537     * offline update interval has eclipsed.<p>
2538     *
2539     * Since the offline indexes still need some time to update the new resources,
2540     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2541     * to ensure that updating is finished.
2542     *
2543     * @see #updateOfflineIndexes(long)
2544     *
2545     */
2546    public void updateOfflineIndexes() {
2547
2548        updateOfflineIndexes(getMaxIndexWaitTime());
2549    }
2550
2551    /**
2552     * Updates all offline indexes.<p>
2553     *
2554     * Can be used to force an index update when it's not convenient to wait until the
2555     * offline update interval has eclipsed.<p>
2556     *
2557     * Since the offline index will still need some time to update the new resources even if it runs directly,
2558     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2559     *
2560     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2561     */
2562    public void updateOfflineIndexes(long waitTime) {
2563
2564        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2565            // notify existing thread of update frequency change
2566            if (LOG.isDebugEnabled()) {
2567                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2568            }
2569            m_offlineIndexThread.interrupt();
2570            if (waitTime > 0) {
2571                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2572            }
2573        }
2574    }
2575
2576    /**
2577     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2578     * We take transitive dependencies into account and handle cyclic dependencies correctly as well.
2579     *
2580     * @param adminCms an OpenCms user context with Admin permissions
2581     * @param updateResources the resources to be re-indexed
2582     *
2583     * @return the updated list of resource to re-index
2584     */
2585    protected List<CmsPublishedResource> addAdditionallyAffectedResources(
2586        CmsObject adminCms,
2587        List<CmsPublishedResource> updateResources) {
2588
2589        Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources);
2590        Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet;
2591        Collection<CmsPublishedResource> additionalResources = Collections.emptySet();
2592        do {
2593            additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck);
2594            additionalResources.addAll(addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck));
2595            updateResources.addAll(additionalResources);
2596            updateResourceSet.addAll(additionalResources);
2597            resourcesToCheck = additionalResources;
2598        } while (resourcesToCheck.size() > 0);
2599        return updateResources;
2600    }
2601
2602    /**
2603     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2604     *
2605     * @param adminCms an OpenCms user context with Admin permissions
2606     * @param updateResources the resources to be re-indexed
2607     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2608     *
2609     * @return the list of resources that need to be additionally re-index
2610     */
2611    protected Collection<CmsPublishedResource> addIndexContentRelatedResources(
2612        CmsObject adminCms,
2613        Collection<CmsPublishedResource> updateResources,
2614        Collection<CmsPublishedResource> updateResourcesToCheck) {
2615
2616        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2617        for (CmsPublishedResource checkedRes : updateResourcesToCheck) {
2618            try {
2619                CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId());
2620                filter = filter.filterType(CmsRelationType.INDEX_CONTENT);
2621                List<CmsRelation> relations = adminCms.readRelations(filter);
2622                for (CmsRelation relation : relations) {
2623                    CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2624                    CmsPublishedResource additionalPubRes = new CmsPublishedResource(res);
2625                    if (!updateResources.contains(additionalPubRes)) {
2626                        additionalUpdateResources.add(additionalPubRes);
2627                    }
2628                }
2629            } catch (CmsException e) {
2630                LOG.error(e.getLocalizedMessage(), e);
2631            }
2632        }
2633        return additionalUpdateResources;
2634    }
2635
2636    /**
2637     * Cleans up the extraction result cache.<p>
2638     */
2639    protected void cleanExtractionCache() {
2640
2641        // clean up the extraction result cache
2642        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2643    }
2644
2645    /**
2646     * Collects the related containerpages to the resources that have been published.<p>
2647     *
2648     * @param adminCms an OpenCms user context with Admin permissions
2649     * @param updateResources the resources to be re-indexed
2650     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2651     *
2652     * @return the list of resources that need to be additionally re-index
2653     */
2654    protected Collection<CmsPublishedResource> findRelatedContainerPages(
2655        CmsObject adminCms,
2656        Collection<CmsPublishedResource> updateResources,
2657        Collection<CmsPublishedResource> updateResourcesToCheck) {
2658
2659        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2660
2661        Set<CmsResource> elementGroups = new HashSet<CmsResource>();
2662        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2663        int containerPageTypeId = -1;
2664        try {
2665            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2666        } catch (CmsLoaderException e) {
2667            // will happen during setup, when container page type is not available yet
2668            LOG.info(e.getLocalizedMessage(), e);
2669        }
2670        if (containerPageTypeId != -1) {
2671            for (CmsPublishedResource pubRes : updateResourcesToCheck) {
2672                try {
2673                    if (OpenCms.getResourceManager().getResourceType(
2674                        pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2675                        CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2676                        filter.filterStrong();
2677                        List<CmsRelation> relations = adminCms.readRelations(filter);
2678                        for (CmsRelation relation : relations) {
2679                            CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2680                            if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2681                                containerPages.add(res);
2682                                if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2683                                    adminCms,
2684                                    adminCms.getSitePath(res))) {
2685                                    addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2686                                }
2687                            } else
2688                                if (OpenCms.getResourceManager().getResourceType(res.getTypeId()).getTypeName().equals(
2689                                    CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) {
2690                                        elementGroups.add(res);
2691                                    }
2692                        }
2693                    }
2694                    if (containerPageTypeId == pubRes.getType()) {
2695                        addDetailContent(
2696                            adminCms,
2697                            containerPages,
2698                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2699                    }
2700                } catch (CmsException e) {
2701                    LOG.error(e.getLocalizedMessage(), e);
2702                }
2703            }
2704            for (CmsResource pubRes : elementGroups) {
2705                try {
2706                    CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2707                    filter.filterStrong();
2708                    List<CmsRelation> relations = adminCms.readRelations(filter);
2709                    for (CmsRelation relation : relations) {
2710                        CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2711                        if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2712                            containerPages.add(res);
2713                            if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2714                                adminCms,
2715                                adminCms.getSitePath(res))) {
2716                                addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2717                            }
2718                        }
2719                    }
2720                } catch (CmsException e) {
2721                    LOG.error(e.getLocalizedMessage(), e);
2722                }
2723            }
2724            // add all found container pages as published resource objects to the list
2725            for (CmsResource page : containerPages) {
2726                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2727                if (!updateResources.contains(pubCont)) {
2728                    // ensure container page is added only once
2729                    additionalUpdateResources.add(pubCont);
2730                }
2731            }
2732        }
2733        return additionalUpdateResources;
2734    }
2735
2736    /**
2737     * Returns the set of names of all configured document types.<p>
2738     *
2739     * @return the set of names of all configured document types
2740     */
2741    protected List<String> getDocumentTypes() {
2742
2743        return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet()));
2744    }
2745
2746    /**
2747     * Returns the a offline project used for offline indexing.<p>
2748     *
2749     * @return the offline project if available
2750     */
2751    protected CmsProject getOfflineIndexProject() {
2752
2753        CmsProject result = null;
2754        for (I_CmsSearchIndex index : m_offlineIndexes) {
2755            try {
2756                result = m_adminCms.readProject(index.getProject());
2757
2758                if (!result.isOnlineProject()) {
2759                    break;
2760                }
2761            } catch (Exception e) {
2762                // may be a missconfigured index, ignore
2763                LOG.error(e.getLocalizedMessage(), e);
2764            }
2765        }
2766        return result;
2767    }
2768
2769    /**
2770     * Returns a new thread manager for the indexing threads.<p>
2771     *
2772     * @return a new thread manager for the indexing threads
2773     */
2774    protected CmsIndexingThreadManager getThreadManager() {
2775
2776        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2777    }
2778
2779    /**
2780     * Initializes the available Cms resource types to be indexed.<p>
2781     *
2782     * A map stores document factories keyed by a string representing
2783     * a colon separated list of Cms resource types and/or mimetypes.<p>
2784     *
2785     * The keys of this map are used to trigger a document factory to convert
2786     * a Cms resource into a Lucene index document.<p>
2787     *
2788     * A document factory is a class implementing the interface
2789     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2790     */
2791    protected void initAvailableDocumentTypes() {
2792
2793        CmsSearchDocumentType documenttype = null;
2794        String className = null;
2795        String name = null;
2796        I_CmsDocumentFactory documentFactory = null;
2797        List<String> resourceTypes = null;
2798        List<String> mimeTypes = null;
2799        Class<?> c = null;
2800
2801        m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>();
2802
2803        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2804
2805            documenttype = m_documentTypeConfigs.get(i);
2806            name = documenttype.getName();
2807
2808            try {
2809                className = documenttype.getClassName();
2810                resourceTypes = documenttype.getResourceTypes();
2811                mimeTypes = documenttype.getMimeTypes();
2812
2813                if (name == null) {
2814                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2815                }
2816                if (className == null) {
2817                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2818                }
2819                if (resourceTypes.size() == 0) {
2820                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2821                }
2822
2823                try {
2824                    c = Class.forName(className);
2825                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2826                        new Object[] {name});
2827                } catch (ClassNotFoundException exc) {
2828                    throw new CmsIndexException(
2829                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2830                        exc);
2831                } catch (Exception exc) {
2832                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2833                }
2834
2835                if (documentFactory.isUsingCache()) {
2836                    // init cache if used by the factory
2837                    documentFactory.setCache(m_extractionResultCache);
2838                }
2839
2840                Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>();
2841                for (Iterator<String> keyIt = documentFactory.getDocumentKeys(
2842                    resourceTypes,
2843                    mimeTypes).iterator(); keyIt.hasNext();) {
2844                    String key = keyIt.next();
2845                    matchingTypes.put(key, documentFactory);
2846                    m_extractionKeys.add(key);
2847                }
2848                m_documentTypes.put(name, matchingTypes);
2849
2850            } catch (CmsException e) {
2851                if (LOG.isWarnEnabled()) {
2852                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2853                }
2854            }
2855        }
2856    }
2857
2858    /**
2859     * Initializes the index sources.
2860     */
2861    protected void initIndexSources() {
2862
2863        for (CmsSearchIndexSource source : m_indexSources.values()) {
2864            source.init();
2865        }
2866    }
2867
2868    /**
2869     * Initializes the configured search indexes.<p>
2870     *
2871     * This initializes also the list of Cms resources types
2872     * to be indexed by an index source.<p>
2873     */
2874    protected void initSearchIndexes() {
2875
2876        I_CmsSearchIndex index = null;
2877        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2878            index = m_indexes.get(i);
2879            // reset disabled flag
2880            index.setEnabled(true);
2881            // check if the index has been configured correctly
2882            if (index.checkConfiguration(m_adminCms)) {
2883                // the index is configured correctly
2884                try {
2885                    index.initialize();
2886                } catch (Exception e) {
2887                    if (CmsLog.INIT.isWarnEnabled()) {
2888                        // in this case the index will be disabled
2889                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2890                    }
2891                }
2892            }
2893            // output a log message if the index was successfully configured or not
2894            if (CmsLog.INIT.isInfoEnabled()) {
2895                if (index.isEnabled()) {
2896                    CmsLog.INIT.info(
2897                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2898                } else {
2899                    CmsLog.INIT.warn(
2900                        Messages.get().getBundle().key(
2901                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2902                            index,
2903                            index.getProject()));
2904                }
2905            }
2906        }
2907    }
2908
2909    /**
2910     * Checks, if the index should be rebuilt/updated at all by the search manager.
2911     * @param index the index to check.
2912     * @return a flag, indicating if the index should be rebuilt/updated at all.
2913     */
2914    protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) {
2915
2916        if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) {
2917            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName()));
2918            return false;
2919        } else {
2920            return true;
2921        }
2922
2923    }
2924
2925    /**
2926     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2927     * after resources have been published.<p>
2928     *
2929     * @param adminCms an OpenCms user context with Admin permissions
2930     * @param publishHistoryId the history ID of the published project
2931     * @param report the report to write the output to
2932     */
2933    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2934
2935        int oldPriority = Thread.currentThread().getPriority();
2936        try {
2937            SEARCH_MANAGER_LOCK.lock();
2938            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2939            List<CmsPublishedResource> publishedResources;
2940            try {
2941                // read the list of all published resources
2942                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2943            } catch (CmsException e) {
2944                LOG.error(
2945                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2946                    e);
2947                return;
2948            }
2949            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2950            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2951
2952            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2953            for (CmsPublishedResource res : publishedResources) {
2954                if (res.isFolder() || res.getState().isUnchanged()) {
2955                    // folders and unchanged resources don't need to be indexed after publish
2956                    continue;
2957                }
2958                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2959                    if (updateResources.contains(res)) {
2960                        // resource may have been added as a sibling of another resource
2961                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2962                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
2963                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
2964                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
2965                        // check it this is a moved resource with source / target info, in this case we need both entries
2966                        if (!hasMoved) {
2967                            // if the resource was moved, we must contain both entries
2968                            updateResources.remove(res);
2969                        }
2970                        // "equals()" implementation of published resource checks for id,
2971                        // so the removed value may have a different "deleted" or "modified" status value
2972                        updateResources.add(res);
2973                    } else {
2974                        // resource not yet contained in the list
2975                        updateResources.add(res);
2976                        // check for the siblings (not for deleted resources, these are already gone)
2977                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
2978                            // this resource has siblings
2979                            try {
2980                                // read siblings from the online project
2981                                List<CmsResource> siblings = adminCms.readSiblings(
2982                                    res.getRootPath(),
2983                                    CmsResourceFilter.ALL);
2984                                Iterator<CmsResource> itSib = siblings.iterator();
2985                                while (itSib.hasNext()) {
2986                                    // check all siblings
2987                                    CmsResource sibling = itSib.next();
2988                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
2989                                    if (!updateResources.contains(sib)) {
2990                                        // ensure sibling is added only once
2991                                        updateResources.add(sib);
2992                                    }
2993                                }
2994                            } catch (CmsException e) {
2995                                // ignore, just use the original resource
2996                                if (LOG.isWarnEnabled()) {
2997                                    LOG.warn(
2998                                        Messages.get().getBundle().key(
2999                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
3000                                            res.getRootPath()),
3001                                        e);
3002                                }
3003                            }
3004                        }
3005                    }
3006                }
3007            }
3008
3009            addAdditionallyAffectedResources(adminCms, updateResources);
3010            updateAllIndexes(adminCms, updateResources, report);
3011        } finally {
3012            SEARCH_MANAGER_LOCK.unlock();
3013            Thread.currentThread().setPriority(oldPriority);
3014        }
3015    }
3016
3017    /**
3018     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p>
3019     *
3020     * @param adminCms an OpenCms user context with Admin permissions
3021     * @param updateResources the resources to update
3022     * @param report the report to write the output to
3023     */
3024    protected void updateAllIndexes(
3025        CmsObject adminCms,
3026        List<CmsPublishedResource> updateResources,
3027        I_CmsReport report) {
3028
3029        try {
3030            SEARCH_MANAGER_LOCK.lock();
3031            if (!updateResources.isEmpty()) {
3032                // sort the resource to update
3033                Collections.sort(updateResources);
3034                // only update the indexes if the list of remaining published resources is not empty
3035                Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
3036                while (i.hasNext()) {
3037                    I_CmsSearchIndex index = i.next();
3038                    if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
3039                        // only update indexes which have the rebuild mode set to "auto"
3040                        try {
3041                            updateIndex(index, report, updateResources);
3042                        } catch (CmsException e) {
3043                            LOG.error(
3044                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
3045                                e);
3046                        }
3047                    }
3048                }
3049            }
3050            // clean up the extraction result cache
3051            cleanExtractionCache();
3052        } finally {
3053            SEARCH_MANAGER_LOCK.unlock();
3054        }
3055
3056    }
3057
3058    /**
3059     * Updates (if required creates) the index with the given name.<p>
3060     *
3061     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
3062     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
3063     * the index will be fully rebuild.<p>
3064     *
3065     * @param index the index to update or rebuild
3066     * @param report the report to write output messages to
3067     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3068     *
3069     * @throws CmsException if something goes wrong
3070     */
3071    protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
3072    throws CmsException {
3073
3074        if (shouldUpdateAtAll(index)) {
3075            try {
3076                SEARCH_MANAGER_LOCK.lock();
3077
3078                // copy the stored admin context for the indexing
3079                CmsObject cms = OpenCms.initCmsObject(m_adminCms);
3080                // make sure a report is available
3081                if (report == null) {
3082                    report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
3083                }
3084
3085                // check if the index has been configured correctly
3086                if (!index.checkConfiguration(cms)) {
3087                    // the index is disabled
3088                    return;
3089                }
3090
3091                // set site root and project for this index
3092                cms.getRequestContext().setSiteRoot("/");
3093                // switch to the index project
3094                cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3095
3096                if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
3097                    // rebuild the complete index
3098
3099                    updateIndexCompletely(cms, index, report);
3100                } else {
3101                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3102                }
3103            } finally {
3104                SEARCH_MANAGER_LOCK.unlock();
3105            }
3106        }
3107    }
3108
3109    /**
3110     * The method updates all OpenCms documents that are indexed.
3111     * @param cms the OpenCms user context to use for accessing the VFS
3112     * @param index the index to update
3113     * @param report the report to write output messages to
3114     * @throws CmsIndexException thrown if indexing fails for some reason
3115     */
3116    @SuppressWarnings("null")
3117    protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report)
3118    throws CmsIndexException {
3119
3120        // create a new thread manager for the indexing threads
3121        CmsIndexingThreadManager threadManager = getThreadManager();
3122
3123        boolean isOfflineIndex = false;
3124        if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
3125            // disable offline indexing while the complete index is rebuild
3126            isOfflineIndex = true;
3127            index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL);
3128            // re-initialize the offline indexes, this will disable this offline index
3129            initOfflineIndexes();
3130        }
3131
3132        I_CmsIndexWriter writer = null;
3133        try {
3134            // create a backup of the existing index
3135            CmsSearchIndex indexInternal = null;
3136            String backup = null;
3137            if (index instanceof CmsSearchIndex) {
3138                indexInternal = (CmsSearchIndex)index;
3139                backup = indexInternal.createIndexBackup();
3140                if (backup != null) {
3141                    indexInternal.indexSearcherOpen(backup);
3142                }
3143            }
3144
3145            // create a new index writer
3146            writer = index.getIndexWriter(report, true);
3147            if (writer instanceof I_CmsSolrIndexWriter) {
3148                try {
3149                    ((I_CmsSolrIndexWriter)writer).deleteAllDocuments();
3150                } catch (IOException e) {
3151                    LOG.error(e.getMessage(), e);
3152                }
3153            }
3154
3155            // output start information on the report
3156            report.println(
3157                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
3158                I_CmsReport.FORMAT_HEADLINE);
3159
3160            // iterate all configured index sources of this index
3161            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3162            while (sources.hasNext()) {
3163                // get the next index source
3164                CmsSearchIndexSource source = sources.next();
3165                // create the indexer
3166                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3167                // new index creation, use all resources from the index source
3168                indexer.rebuildIndex(writer, threadManager, source);
3169
3170                // wait for indexing threads to finish
3171                while (threadManager.isRunning()) {
3172                    try {
3173                        Thread.sleep(500);
3174                    } catch (InterruptedException e) {
3175                        // just continue with the loop after interruption
3176                        LOG.info(e.getLocalizedMessage(), e);
3177                    }
3178                }
3179
3180                // commit and optimize the index after each index source has been finished
3181                try {
3182                    writer.commit();
3183                } catch (IOException e) {
3184                    if (LOG.isWarnEnabled()) {
3185                        LOG.warn(
3186                            Messages.get().getBundle().key(
3187                                Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3188                                index.getName(),
3189                                index.getPath()),
3190                            e);
3191                    }
3192                }
3193                try {
3194                    writer.optimize();
3195                } catch (IOException e) {
3196                    if (LOG.isWarnEnabled()) {
3197                        LOG.warn(
3198                            Messages.get().getBundle().key(
3199                                Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
3200                                index.getName(),
3201                                index.getPath()),
3202                            e);
3203                    }
3204                }
3205            }
3206
3207            // we are sure here that indexInternal is not null
3208            if (backup != null) {
3209                // remove the backup after the files have been re-indexed
3210                indexInternal.indexSearcherClose();
3211                indexInternal.removeIndexBackup(backup);
3212            }
3213
3214            // output finish information on the report
3215            report.println(
3216                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
3217                I_CmsReport.FORMAT_HEADLINE);
3218
3219        } finally {
3220            if (writer != null) {
3221                try {
3222                    writer.close();
3223                } catch (IOException e) {
3224                    if (LOG.isWarnEnabled()) {
3225                        LOG.warn(
3226                            Messages.get().getBundle().key(
3227                                Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
3228                                index.getPath(),
3229                                index.getName()),
3230                            e);
3231                    }
3232                }
3233            }
3234            if (isOfflineIndex) {
3235                // reset the mode of the offline index
3236                index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE);
3237                // re-initialize the offline indexes, this will re-enable this index
3238                initOfflineIndexes();
3239            }
3240            // index has changed - initialize the index searcher instance
3241            index.onIndexChanged(true);
3242        }
3243
3244        // show information about indexing runtime
3245        threadManager.reportStatistics(report);
3246    }
3247
3248    /**
3249     * Incrementally updates the given index.<p>
3250     *
3251     * @param cms the OpenCms user context to use for accessing the VFS
3252     * @param index the index to update
3253     * @param report the report to write output messages to
3254     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3255     *
3256     * @throws CmsException if something goes wrong
3257     */
3258    protected void updateIndexIncremental(
3259        CmsObject cms,
3260        I_CmsSearchIndex index,
3261        I_CmsReport report,
3262        List<CmsPublishedResource> resourcesToIndex)
3263    throws CmsException {
3264
3265        try {
3266            SEARCH_MANAGER_LOCK.lock();
3267
3268            // update the existing index
3269            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
3270
3271            boolean hasResourcesToDelete = false;
3272            boolean hasResourcesToUpdate = false;
3273
3274            // iterate all configured index sources of this index
3275            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3276            while (sources.hasNext()) {
3277                // get the next index source
3278                CmsSearchIndexSource source = sources.next();
3279                // create the indexer
3280                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3281                // collect the resources to update
3282                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
3283                if (!updateData.isEmpty()) {
3284                    // add the update collection to the internal pipeline
3285                    updateCollections.add(updateData);
3286                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
3287                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
3288                }
3289            }
3290
3291            // only start index modification if required
3292            if (hasResourcesToDelete || hasResourcesToUpdate) {
3293                // output start information on the report
3294                report.println(
3295                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
3296                    I_CmsReport.FORMAT_HEADLINE);
3297
3298                I_CmsIndexWriter writer = null;
3299                try {
3300                    // obtain an index writer that updates the current index
3301                    writer = index.getIndexWriter(report, false);
3302
3303                    if (hasResourcesToDelete) {
3304                        // delete the resource from the index
3305                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3306                        while (i.hasNext()) {
3307                            CmsSearchIndexUpdateData updateCollection = i.next();
3308                            if (updateCollection.hasResourcesToDelete()) {
3309                                updateCollection.getIndexer().deleteResources(
3310                                    writer,
3311                                    updateCollection.getResourcesToDelete());
3312                            }
3313                        }
3314                    }
3315
3316                    if (hasResourcesToUpdate) {
3317                        // create a new thread manager
3318                        CmsIndexingThreadManager threadManager = getThreadManager();
3319
3320                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3321                        while (i.hasNext()) {
3322                            CmsSearchIndexUpdateData updateCollection = i.next();
3323                            if (updateCollection.hasResourceToUpdate()) {
3324                                updateCollection.getIndexer().updateResources(
3325                                    writer,
3326                                    threadManager,
3327                                    updateCollection.getResourcesToUpdate());
3328                            }
3329                        }
3330
3331                        // wait for indexing threads to finish
3332                        while (threadManager.isRunning()) {
3333                            try {
3334                                Thread.sleep(500);
3335                            } catch (InterruptedException e) {
3336                                // just continue with the loop after interruption
3337                                LOG.info(e.getLocalizedMessage(), e);
3338                            }
3339                        }
3340                    }
3341                } finally {
3342                    // close the index writer
3343                    if (writer != null) {
3344                        try {
3345                            writer.commit();
3346                        } catch (IOException e) {
3347                            LOG.error(
3348                                Messages.get().getBundle().key(
3349                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3350                                    index.getName(),
3351                                    index.getPath()),
3352                                e);
3353                        }
3354                    }
3355                    // index has changed - initialize the index searcher instance
3356                    index.onIndexChanged(false);
3357                }
3358
3359                // output finish information on the report
3360                report.println(
3361                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3362                    I_CmsReport.FORMAT_HEADLINE);
3363            }
3364        } finally {
3365            SEARCH_MANAGER_LOCK.unlock();
3366        }
3367    }
3368
3369    /**
3370     * Updates the offline search indexes for the given list of resources.<p>
3371     *
3372     * @param report the report to write the index information to
3373     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3374     */
3375    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3376
3377        CmsObject cms = m_adminCms;
3378        try {
3379            // copy the administration context for the indexing
3380            cms = OpenCms.initCmsObject(m_adminCms);
3381            // set site root and project for this index
3382            cms.getRequestContext().setSiteRoot("/");
3383        } catch (CmsException e) {
3384            LOG.error(e.getLocalizedMessage(), e);
3385        }
3386
3387        Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator();
3388        while (j.hasNext()) {
3389            I_CmsSearchIndex index = j.next();
3390            if (index.getSources() != null) {
3391                try {
3392                    // switch to the index project
3393                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3394                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3395                } catch (CmsException e) {
3396                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3397                }
3398            }
3399        }
3400    }
3401
3402    /**
3403     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3404     *
3405     * @param adminCms the cms context
3406     * @param containerPages the containerpages
3407     * @param containerPage the container page site path
3408     */
3409    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3410
3411        if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) {
3412
3413            try {
3414                CmsResource detailRes = adminCms.readResource(
3415                    CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage),
3416                    CmsResourceFilter.IGNORE_EXPIRATION);
3417                containerPages.add(detailRes);
3418            } catch (Throwable e) {
3419                if (LOG.isWarnEnabled()) {
3420                    LOG.warn(e.getLocalizedMessage(), e);
3421                }
3422            }
3423        }
3424    }
3425
3426    /**
3427     * Creates the Solr core container.<p>
3428     *
3429     * @return the created core container
3430     */
3431    private CoreContainer createCoreContainer() {
3432
3433        CoreContainer container = null;
3434        try {
3435            // get the core container
3436            // still no core container: create it
3437            container = CoreContainer.createAndLoad(
3438                Paths.get(m_solrConfig.getHome()),
3439                m_solrConfig.getSolrFile().toPath());
3440            if (CmsLog.INIT.isInfoEnabled()) {
3441                CmsLog.INIT.info(
3442                    Messages.get().getBundle().key(
3443                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3444                        m_solrConfig.getHome(),
3445                        m_solrConfig.getSolrFile().getName()));
3446            }
3447        } catch (Exception e) {
3448            LOG.error(
3449                Messages.get().getBundle().key(
3450                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3451                    m_solrConfig.getSolrFile().getAbsolutePath()),
3452                e);
3453        }
3454        return container;
3455
3456    }
3457
3458    /**
3459     * Remove write.lock file in the data directory to ensure the index is unlocked.
3460     * @param dataDir the data directory of the Solr index that should be unlocked.
3461     */
3462    private void ensureIndexIsUnlocked(String dataDir) {
3463
3464        Collection<File> lockFiles = new ArrayList<File>(2);
3465        lockFiles.add(
3466            new File(
3467                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock"));
3468        lockFiles.add(
3469            new File(
3470                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck")
3471                    + "write.lock"));
3472        for (File lockFile : lockFiles) {
3473            if (lockFile.exists()) {
3474                lockFile.delete();
3475                LOG.warn(
3476                    "Forcely unlocking index with data dir \""
3477                        + dataDir
3478                        + "\" by removing file \""
3479                        + lockFile.getAbsolutePath()
3480                        + "\".");
3481            }
3482        }
3483    }
3484
3485    /**
3486     * Returns the report in the given event data, if <code>null</code>
3487     * a new log report is used.<p>
3488     *
3489     * @param event the event to get the report for
3490     *
3491     * @return the report
3492     */
3493    private I_CmsReport getEventReport(CmsEvent event) {
3494
3495        I_CmsReport report = null;
3496        if (event.getData() != null) {
3497            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3498        }
3499        if (report == null) {
3500            report = new CmsLogReport(Locale.ENGLISH, getClass());
3501        }
3502        return report;
3503    }
3504
3505    /**
3506     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3507     *
3508     * @param publishedResources a list of published resources
3509     *
3510     * @return the set of structure ids that satisfy the condition above
3511     */
3512    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3513        List<CmsPublishedResource> publishedResources) {
3514
3515        Set<CmsUUID> result = new HashSet<CmsUUID>();
3516        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3517        for (CmsPublishedResource pubRes : publishedResources) {
3518            if (pubRes.getState().isNew()) {
3519                result.add(pubRes.getStructureId());
3520            }
3521            if (pubRes.getState().isDeleted()) {
3522                deletedSet.add(pubRes.getStructureId());
3523            }
3524        }
3525        result.retainAll(deletedSet);
3526        return result;
3527    }
3528
3529    /**
3530     * Shuts down the Solr core container.<p>
3531     */
3532    private void shutDownSolrContainer() {
3533
3534        if (m_coreContainer != null) {
3535            for (SolrCore core : m_coreContainer.getCores()) {
3536                // do not unload spellcheck core because otherwise the core.properties file is removed
3537                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3538                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3539                    m_coreContainer.unload(core.getName(), false, false, true);
3540                }
3541            }
3542            m_coreContainer.shutdown();
3543            if (CmsLog.INIT.isInfoEnabled()) {
3544                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3545            }
3546            m_coreContainer = null;
3547        }
3548    }
3549
3550}