001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.configuration.CmsConfigurationException;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishedResource;
033import org.opencms.db.CmsResourceState;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsProject;
036import org.opencms.file.CmsResource;
037import org.opencms.file.CmsResourceFilter;
038import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
039import org.opencms.file.types.CmsResourceTypeXmlContent;
040import org.opencms.i18n.CmsMessageContainer;
041import org.opencms.jsp.CmsJspTagContainer;
042import org.opencms.loader.CmsLoaderException;
043import org.opencms.main.CmsEvent;
044import org.opencms.main.CmsException;
045import org.opencms.main.CmsIllegalArgumentException;
046import org.opencms.main.CmsIllegalStateException;
047import org.opencms.main.CmsLog;
048import org.opencms.main.I_CmsEventListener;
049import org.opencms.main.OpenCms;
050import org.opencms.main.OpenCmsSolrHandler;
051import org.opencms.relations.CmsRelation;
052import org.opencms.relations.CmsRelationFilter;
053import org.opencms.report.CmsLogReport;
054import org.opencms.report.I_CmsReport;
055import org.opencms.scheduler.I_CmsScheduledJob;
056import org.opencms.search.documents.A_CmsVfsDocument;
057import org.opencms.search.documents.CmsExtractionResultCache;
058import org.opencms.search.documents.I_CmsDocumentFactory;
059import org.opencms.search.documents.I_CmsTermHighlighter;
060import org.opencms.search.fields.CmsLuceneField;
061import org.opencms.search.fields.CmsLuceneFieldConfiguration;
062import org.opencms.search.fields.CmsSearchField;
063import org.opencms.search.fields.CmsSearchFieldConfiguration;
064import org.opencms.search.fields.CmsSearchFieldMapping;
065import org.opencms.search.solr.CmsSolrConfiguration;
066import org.opencms.search.solr.CmsSolrFieldConfiguration;
067import org.opencms.search.solr.CmsSolrIndex;
068import org.opencms.search.solr.CmsSolrIndexWriter;
069import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
070import org.opencms.security.CmsRole;
071import org.opencms.security.CmsRoleViolationException;
072import org.opencms.util.A_CmsModeStringEnumeration;
073import org.opencms.util.CmsStringUtil;
074import org.opencms.util.CmsUUID;
075import org.opencms.util.CmsWaitHandle;
076
077import java.io.File;
078import java.io.IOException;
079import java.nio.file.FileSystems;
080import java.nio.file.Paths;
081import java.util.ArrayList;
082import java.util.Collections;
083import java.util.HashMap;
084import java.util.HashSet;
085import java.util.Iterator;
086import java.util.List;
087import java.util.Locale;
088import java.util.Map;
089import java.util.Set;
090import java.util.TreeMap;
091import java.util.concurrent.locks.ReentrantLock;
092
093import org.apache.commons.logging.Log;
094import org.apache.lucene.analysis.Analyzer;
095import org.apache.lucene.analysis.standard.StandardAnalyzer;
096import org.apache.lucene.analysis.util.CharArraySet;
097import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
098import org.apache.solr.client.solrj.impl.HttpSolrClient;
099import org.apache.solr.core.CoreContainer;
100import org.apache.solr.core.CoreDescriptor;
101import org.apache.solr.core.SolrCore;
102
103/**
104 * Implements the general management and configuration of the search and
105 * indexing facilities in OpenCms.<p>
106 *
107 * @since 6.0.0
108 */
109public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
110
111    /**
112     *  Enumeration class for force unlock types.<p>
113     */
114    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
115
116        /** Force unlock type "always". */
117        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
118
119        /** Force unlock type "never". */
120        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
121
122        /** Force unlock type "only full". */
123        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
124
125        /** Serializable version id. */
126        private static final long serialVersionUID = 74746076708908673L;
127
128        /**
129         * Creates a new force unlock type with the given name.<p>
130         *
131         * @param mode the mode id to use
132         */
133        protected CmsSearchForceUnlockMode(String mode) {
134
135            super(mode);
136        }
137
138        /**
139         * Returns the lock type for the given type value.<p>
140         *
141         * @param type the type value to get the lock type for
142         *
143         * @return the lock type for the given type value
144         */
145        public static CmsSearchForceUnlockMode valueOf(String type) {
146
147            if (type.equals(ALWAYS.toString())) {
148                return ALWAYS;
149            } else if (type.equals(NEVER.toString())) {
150                return NEVER;
151            } else {
152                return ONLYFULL;
153            }
154        }
155    }
156
157    /**
158     * Handles offline index generation.<p>
159     */
160    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
161
162        /** Indicates if the event handlers for the offline search have been already registered. */
163        private boolean m_isEventRegistered;
164
165        /** The list of resources to index. */
166        private List<CmsPublishedResource> m_resourcesToIndex;
167
168        /**
169         * Initializes the offline index handler.<p>
170         */
171        protected CmsSearchOfflineHandler() {
172
173            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
174        }
175
176        /**
177         * Implements the event listener of this class.<p>
178         *
179         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
180         */
181        @SuppressWarnings("unchecked")
182        public void cmsEvent(CmsEvent event) {
183
184            switch (event.getType()) {
185                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
186                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
187                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
188                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
189                    Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
190                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
191                        // skip lock & unlock
192                        return;
193                    }
194                    // skip indexing if flag is set in event
195                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
196                    if (skip != null) {
197                        return;
198                    }
199
200                    // a resource has been modified - offline indexes require (re)indexing
201                    List<CmsResource> resources = Collections.singletonList(
202                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
203                    reIndexResources(resources);
204                    break;
205                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
206                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
207                        I_CmsEventListener.KEY_RESOURCES);
208                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
209                    for (CmsResource res : resourcesToDelete) {
210                        if (res.getState().isNew()) {
211                            // if the resource is new and a delete action was performed
212                            // --> set the state of the resource to deleted
213                            res.setState(CmsResourceState.STATE_DELETED);
214                        }
215                    }
216                    reIndexResources(resourcesToDelete);
217                    break;
218                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
219                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
220                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
221                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
222                    // a list of resources has been modified - offline indexes require (re)indexing
223                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
224                    break;
225                default:
226                    // no operation
227            }
228        }
229
230        /**
231         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
232         *
233         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
234         */
235        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
236
237            m_resourcesToIndex.addAll(resourcesToIndex);
238        }
239
240        /**
241         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
242         *
243         * @return the resources to index
244         */
245        protected List<CmsPublishedResource> getResourcesToIndex() {
246
247            List<CmsPublishedResource> result;
248            synchronized (this) {
249                result = m_resourcesToIndex;
250                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
251            }
252            try {
253                CmsObject cms = m_adminCms;
254                CmsProject offline = getOfflineIndexProject();
255                if (offline != null) {
256                    // switch to the offline project if available
257                    cms = OpenCms.initCmsObject(m_adminCms);
258                    cms.getRequestContext().setCurrentProject(offline);
259                }
260                findRelatedContainerPages(cms, result);
261            } catch (CmsException e) {
262                LOG.error(e.getLocalizedMessage(), e);
263            }
264            return result;
265        }
266
267        /**
268         * Initializes this offline search handler, registering the event handlers if required.<p>
269         */
270        protected void initialize() {
271
272            if (m_offlineIndexes.size() > 0) {
273                // there is at least one offline index configured
274                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
275                    // create the offline indexing thread
276                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
277                    // start the offline index thread
278                    m_offlineIndexThread.start();
279                }
280            } else {
281                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
282                    // no offline indexes but thread still running, stop the thread
283                    m_offlineIndexThread.shutDown();
284                    m_offlineIndexThread = null;
285                }
286            }
287            // do this only in case there are offline indexes configured
288            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
289                m_isEventRegistered = true;
290                // register this object as event listener
291                OpenCms.addCmsEventListener(
292                    this,
293                    new int[] {
294                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
295                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
296                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
297                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
298                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
299                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
300                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
301                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
302                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
303            }
304        }
305
306        /**
307         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
308         *
309         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
310         */
311        protected synchronized void reIndexResources(List<CmsResource> resources) {
312
313            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
314            for (CmsResource res : resources) {
315                CmsPublishedResource pubRes = new CmsPublishedResource(res);
316                resourcesToIndex.add(pubRes);
317            }
318            if (resourcesToIndex.size() > 0) {
319                // add the resources found to the offline index thread
320                addResourcesToIndex(resourcesToIndex);
321            }
322        }
323    }
324
325    /**
326     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
327     */
328    protected class CmsSearchOfflineIndexThread extends Thread {
329
330        /** The event handler that triggers this thread. */
331        CmsSearchOfflineHandler m_handler;
332
333        /** Indicates if this thread is still alive. */
334        boolean m_isAlive;
335
336        /** Indicates that an index update thread is currently running. */
337        private boolean m_isUpdating;
338
339        /** If true a manual update (after file upload) was triggered. */
340        private boolean m_updateTriggered;
341
342        /** The wait handle used for signalling when the worker thread has finished. */
343        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
344
345        /**
346         * Constructor.<p>
347         *
348         * @param handler the offline index event handler
349         */
350        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
351
352            super("OpenCms: Offline Search Indexer");
353            m_handler = handler;
354        }
355
356        /**
357         * Gets the wait handle used for signalling when the worker thread has finished.
358         *
359         * @return the wait handle
360         **/
361        public CmsWaitHandle getWaitHandle() {
362
363            return m_waitHandle;
364        }
365
366        /**
367         * @see java.lang.Thread#interrupt()
368         */
369        @Override
370        public void interrupt() {
371
372            super.interrupt();
373            m_updateTriggered = true;
374        }
375
376        /**
377         * @see java.lang.Thread#run()
378         */
379        @Override
380        public void run() {
381
382            // create a log report for the output
383            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
384            long offlineUpdateFrequency = getOfflineUpdateFrequency();
385            m_updateTriggered = false;
386            try {
387                while (m_isAlive) {
388                    if (!m_updateTriggered) {
389                        try {
390                            sleep(offlineUpdateFrequency);
391                        } catch (InterruptedException e) {
392                            // continue the thread after interruption
393                            if (!m_isAlive) {
394                                // the thread has been shut down while sleeping
395                                continue;
396                            }
397                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
398                                // offline update frequency change - clear interrupt status
399                                offlineUpdateFrequency = getOfflineUpdateFrequency();
400                            }
401                            LOG.info(e.getLocalizedMessage(), e);
402                        }
403                    }
404                    if (m_isAlive) {
405                        // set update trigger to false since we do the update now
406                        m_updateTriggered = false;
407                        // get list of resource to update
408                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
409                        if (resourcesToIndex.size() > 0) {
410                            // only start indexing if there is at least one resource
411                            startOfflineUpdateThread(report, resourcesToIndex);
412                        } else {
413                            getWaitHandle().release();
414                        }
415                        // this is just called to clear the interrupt status of the thread
416                        interrupted();
417                    }
418                }
419            } finally {
420                // make sure that live status is reset in case of Exceptions
421                m_isAlive = false;
422            }
423
424        }
425
426        /**
427         * @see java.lang.Thread#start()
428         */
429        @Override
430        public synchronized void start() {
431
432            m_isAlive = true;
433            super.start();
434        }
435
436        /**
437         * Obtains the list of resource to update in the offline index,
438         * then optimizes the list by removing duplicate entries.<p>
439         *
440         * @return the list of resource to update in the offline index
441         */
442        protected List<CmsPublishedResource> getResourcesToIndex() {
443
444            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
445            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
446
447            // Reverse to always keep the last list entries
448            Collections.reverse(resourcesToIndex);
449            for (CmsPublishedResource pubRes : resourcesToIndex) {
450                boolean addResource = true;
451                for (CmsPublishedResource resRes : result) {
452                    if (pubRes.equals(resRes)
453                        && (pubRes.getState() == resRes.getState())
454                        && (pubRes.getMovedState() == resRes.getMovedState())
455                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
456                        // resource already in the update list
457                        addResource = false;
458                        break;
459                    }
460                }
461                if (addResource) {
462                    result.add(pubRes);
463                }
464
465            }
466            Collections.reverse(result);
467            return changeStateOfMoveOriginsToDeleted(result);
468        }
469
470        /**
471         * Shuts down this offline index thread.<p>
472         */
473        protected void shutDown() {
474
475            m_isAlive = false;
476            interrupt();
477            if (m_isUpdating) {
478                long waitTime = getOfflineUpdateFrequency() / 2;
479                int waitSteps = 0;
480                do {
481                    try {
482                        // wait half the time of the offline index frequency for the thread to finish
483                        Thread.sleep(waitTime);
484                    } catch (InterruptedException e) {
485                        // continue
486                        LOG.info(e.getLocalizedMessage(), e);
487                    }
488                    waitSteps++;
489                    // wait 5 times then stop waiting
490                } while ((waitSteps < 5) && m_isUpdating);
491            }
492        }
493
494        /**
495         * Updates the offline search indexes for the given list of resources.<p>
496         *
497         * @param report the report to write the index information to
498         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
499         */
500        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
501
502            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
503            long startTime = System.currentTimeMillis();
504            long waitTime = getOfflineUpdateFrequency() / 2;
505            if (LOG.isDebugEnabled()) {
506                LOG.debug(
507                    Messages.get().getBundle().key(
508                        Messages.LOG_OI_UPDATE_START_1,
509                        Integer.valueOf(resourcesToIndex.size())));
510            }
511
512            m_isUpdating = true;
513            thread.start();
514
515            do {
516                try {
517                    // wait half the time of the offline index frequency for the thread to finish
518                    thread.join(waitTime);
519                } catch (InterruptedException e) {
520                    // continue
521                    LOG.info(e.getLocalizedMessage(), e);
522                }
523                if (thread.isAlive()) {
524                    LOG.warn(
525                        Messages.get().getBundle().key(
526                            Messages.LOG_OI_UPDATE_LONG_2,
527                            Integer.valueOf(resourcesToIndex.size()),
528                            Long.valueOf(System.currentTimeMillis() - startTime)));
529                }
530            } while (thread.isAlive());
531            m_isUpdating = false;
532
533            if (LOG.isDebugEnabled()) {
534                LOG.debug(
535                    Messages.get().getBundle().key(
536                        Messages.LOG_OI_UPDATE_FINISH_2,
537                        Integer.valueOf(resourcesToIndex.size()),
538                        Long.valueOf(System.currentTimeMillis() - startTime)));
539            }
540        }
541
542        /**
543         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
544         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
545         *
546         * @param resourcesToIndex the resources to index
547         *
548         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
549         */
550        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
551            List<CmsPublishedResource> resourcesToIndex) {
552
553            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
554            for (CmsPublishedResource resource : resourcesToIndex) {
555                if (resource.getState().isDeleted()) {
556                    // we don't want the last path to be from a deleted resource
557                    continue;
558                }
559                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
560            }
561            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
562            for (CmsPublishedResource resource : resourcesToIndex) {
563                if (resource.getState().isDeleted()) {
564                    result.add(resource);
565                    continue;
566                }
567                String lastValidPath = lastValidPaths.get(resource.getStructureId());
568                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
569                    result.add(resource);
570                } else {
571                    result.add(
572                        new CmsPublishedResource(
573                            resource.getStructureId(),
574                            resource.getResourceId(),
575                            resource.getPublishTag(),
576                            resource.getRootPath(),
577                            resource.getType(),
578                            resource.isFolder(),
579                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
580                            resource.getSiblingCount()));
581                }
582            }
583            return result;
584        }
585    }
586
587    /**
588     * An offline index worker Thread runs each time for every offline index update action.<p>
589     *
590     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
591     * problems if a single operation "hangs" the Tread.<p>
592     */
593    protected class CmsSearchOfflineIndexWorkThread extends Thread {
594
595        /** The report to write the index information to. */
596        I_CmsReport m_report;
597
598        /** The list of {@link CmsPublishedResource} objects to index. */
599        List<CmsPublishedResource> m_resourcesToIndex;
600
601        /**
602         * Updates the offline search indexes for the given list of resources.<p>
603         *
604         * @param report the report to write the index information to
605         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
606         */
607        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
608
609            super("OpenCms: Offline Search Index Worker");
610            m_report = report;
611            m_resourcesToIndex = resourcesToIndex;
612        }
613
614        /**
615         * @see java.lang.Thread#run()
616         */
617        @Override
618        public void run() {
619
620            updateIndexOffline(m_report, m_resourcesToIndex);
621            if (m_offlineIndexThread != null) {
622                m_offlineIndexThread.getWaitHandle().release();
623            }
624        }
625    }
626
627    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
628    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
629
630    /** The default value used for generating search result excerpts (1024 chars). */
631    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
632
633    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
634    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
635
636    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
637    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
638
639    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
640    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
641
642    /** The default maximal wait time for re-indexing after editing a content. */
643    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
644
645    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
646    public static final int DEFAULT_TIMEOUT = 60000;
647
648    /** Scheduler parameter: Update only a specified list of indexes. */
649    public static final String JOB_PARAM_INDEXLIST = "indexList";
650
651    /** Scheduler parameter: Write the output of the update to the logfile. */
652    public static final String JOB_PARAM_WRITELOG = "writeLog";
653
654    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
655    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
656
657    /** The log object for this class. */
658    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
659
660    /** The administrator OpenCms user context to access OpenCms VFS resources. */
661    protected CmsObject m_adminCms;
662
663    /** The list of indexes that are configured for offline index mode. */
664    protected List<CmsSearchIndex> m_offlineIndexes;
665
666    /** The thread used of offline indexing. */
667    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
668
669    /** Configured analyzers for languages using &lt;analyzer&gt;. */
670    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
671
672    /** Stores the offline update frequency while indexing is paused. */
673    private long m_configuredOfflineIndexingFrequency;
674
675    /** The Solr core container. */
676    private CoreContainer m_coreContainer;
677
678    /** A map of document factory configurations. */
679    private List<CmsSearchDocumentType> m_documentTypeConfigs;
680
681    /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */
682    private Map<String, I_CmsDocumentFactory> m_documentTypes;
683
684    /** The max age for extraction results to remain in the cache. */
685    private float m_extractionCacheMaxAge;
686
687    /** The cache for the extraction results. */
688    private CmsExtractionResultCache m_extractionResultCache;
689
690    /** Contains the available field configurations. */
691    private Map<String, CmsSearchFieldConfiguration> m_fieldConfigurations;
692
693    /** The force unlock type. */
694    private CmsSearchForceUnlockMode m_forceUnlockMode;
695
696    /** The class used to highlight the search terms in the excerpt of a search result. */
697    private I_CmsTermHighlighter m_highlighter;
698
699    /** A list of search indexes. */
700    private List<CmsSearchIndex> m_indexes;
701
702    /** Seconds to wait for an index lock. */
703    private int m_indexLockMaxWaitSeconds = 10;
704
705    /** Configured index sources. */
706    private Map<String, CmsSearchIndexSource> m_indexSources;
707
708    /** The max. char. length of the excerpt in the search result. */
709    private int m_maxExcerptLength;
710
711    /** The maximum number of modifications before a commit in the search index is triggered. */
712    private int m_maxModificationsBeforeCommit;
713
714    /** The offline index search handler. */
715    private CmsSearchOfflineHandler m_offlineHandler;
716
717    /** The update frequency of the offline indexer in milliseconds. */
718    private long m_offlineUpdateFrequency;
719
720    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
721    private long m_maxIndexWaitTime;
722
723    /** Path to index files below WEB-INF/. */
724    private String m_path;
725
726    /** The Solr configuration. */
727    private CmsSolrConfiguration m_solrConfig;
728
729    /** Timeout for abandoning indexing thread. */
730    private long m_timeout;
731
732    /**
733     * Default constructor when called as cron job.<p>
734     */
735    public CmsSearchManager() {
736
737        m_documentTypes = new HashMap<String, I_CmsDocumentFactory>();
738        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
739        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
740        m_indexes = new ArrayList<CmsSearchIndex>();
741        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
742        m_offlineHandler = new CmsSearchOfflineHandler();
743        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
744        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
745        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
746        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
747        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
748
749        m_fieldConfigurations = new HashMap<String, CmsSearchFieldConfiguration>();
750        // make sure we have a "standard" field configuration
751        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
752
753        if (CmsLog.INIT.isInfoEnabled()) {
754            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
755        }
756    }
757
758    /**
759     * Returns an analyzer for the given class name.<p>
760     *
761     * @param className the class name of the analyzer
762     *
763     * @return the appropriate lucene analyzer
764     *
765     * @throws Exception if something goes wrong
766     */
767    public static Analyzer getAnalyzer(String className) throws Exception {
768
769        Analyzer analyzer = null;
770        Class<?> analyzerClass;
771        try {
772            analyzerClass = Class.forName(className);
773        } catch (ClassNotFoundException e) {
774            // allow Lucene standard classes to be written in a short form
775            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
776        }
777
778        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
779        if (StandardAnalyzer.class.equals(analyzerClass)) {
780            // the Lucene standard analyzer is used - but without any stopwords.
781            // TODO: Is it a good idea to remove the default english stopwords used by default?
782            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
783        } else {
784            analyzer = (Analyzer)analyzerClass.newInstance();
785        }
786        return analyzer;
787    }
788
789    /**
790     * Returns the Solr index configured with the parameters name.
791     * The parameters must contain a key/value pair with an existing
792     * Solr index, otherwise <code>null</code> is returned.<p>
793     *
794     * @param cms the current context
795     * @param params the parameter map
796     *
797     * @return the best matching Solr index
798     */
799    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
800
801        String indexName = null;
802        CmsSolrIndex index = null;
803        // try to get the index name from the parameters: 'core' or 'index'
804        if (params != null) {
805            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
806            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
807            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
808            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
809            : null);
810        }
811        if (indexName == null) {
812            // if no parameter is specified try to use the default online/offline indexes by context
813            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
814            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
815            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
816        }
817        // try to get the index
818        index = indexName != null ? OpenCms.getSearchManager().getIndexSolr(indexName) : null;
819        if (index == null) {
820            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
821            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
822            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
823                index = solrs.get(0);
824            }
825        }
826        return index;
827    }
828
829    /**
830     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
831     *
832     * @param indexName the name of the index to check
833     *
834     * @return <code>true</code> if the index for the given name is a Lucene index
835     */
836    public static boolean isLuceneIndex(String indexName) {
837
838        CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
839        if (i instanceof CmsSolrIndex) {
840            return false;
841        }
842        return true;
843    }
844
845    /**
846     * Adds an analyzer.<p>
847     *
848     * @param analyzer an analyzer
849     */
850    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
851
852        m_analyzers.put(analyzer.getLocale(), analyzer);
853
854        if (CmsLog.INIT.isInfoEnabled()) {
855            CmsLog.INIT.info(
856                Messages.get().getBundle().key(
857                    Messages.INIT_ADD_ANALYZER_2,
858                    analyzer.getLocale(),
859                    analyzer.getClassName()));
860        }
861    }
862
863    /**
864     * Adds a document type.<p>
865     *
866     * @param documentType a document type
867     */
868    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
869
870        m_documentTypeConfigs.add(documentType);
871
872        if (CmsLog.INIT.isInfoEnabled()) {
873            CmsLog.INIT.info(
874                Messages.get().getBundle().key(
875                    Messages.INIT_SEARCH_DOC_TYPES_2,
876                    documentType.getName(),
877                    documentType.getClassName()));
878        }
879    }
880
881    /**
882     * Adds a search field configuration to the search manager.<p>
883     *
884     * @param fieldConfiguration the search field configuration to add
885     */
886    public void addFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration) {
887
888        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
889        if (fieldConfiguration.getFields().isEmpty()) {
890            LOG.debug(
891                Messages.get().getBundle().key(
892                    Messages.LOG_FIELD_CONFIGURATION_IS_EMPTY_1,
893                    fieldConfiguration.getName()));
894        }
895    }
896
897    /**
898     * Adds a search index to the configuration.<p>
899     *
900     * @param searchIndex the search index to add
901     */
902    public void addSearchIndex(CmsSearchIndex searchIndex) {
903
904        if ((searchIndex.getSources() == null) || (searchIndex.getPath() == null)) {
905            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
906                try {
907                    searchIndex.initialize();
908                } catch (CmsException e) {
909                    // should never happen
910                    LOG.error(e.getMessage(), e);
911                }
912            }
913        }
914
915        // name: not null or emtpy and unique
916        String name = searchIndex.getName();
917        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
918            throw new CmsIllegalArgumentException(
919                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
920        }
921        if (m_indexSources.keySet().contains(name)) {
922            throw new CmsIllegalArgumentException(
923                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
924        }
925
926        m_indexes.add(searchIndex);
927        if (m_adminCms != null) {
928            initOfflineIndexes();
929        }
930
931        if (CmsLog.INIT.isInfoEnabled()) {
932            CmsLog.INIT.info(
933                Messages.get().getBundle().key(
934                    Messages.INIT_ADD_SEARCH_INDEX_2,
935                    searchIndex.getName(),
936                    searchIndex.getProject()));
937        }
938    }
939
940    /**
941     * Adds a search index source configuration.<p>
942     *
943     * @param searchIndexSource a search index source configuration
944     */
945    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
946
947        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
948
949        if (CmsLog.INIT.isInfoEnabled()) {
950            CmsLog.INIT.info(
951                Messages.get().getBundle().key(
952                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
953                    searchIndexSource.getName(),
954                    searchIndexSource.getIndexerClassName()));
955        }
956    }
957
958    /**
959     * Implements the event listener of this class.<p>
960     *
961     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
962     */
963    public void cmsEvent(CmsEvent event) {
964
965        switch (event.getType()) {
966            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
967                List<String> indexNames = null;
968                if ((event.getData() != null)
969                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
970                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
971                    indexNames = CmsStringUtil.splitAsList(
972                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
973                        ",",
974                        true);
975                }
976                try {
977                    if (LOG.isDebugEnabled()) {
978                        LOG.debug(
979                            Messages.get().getBundle().key(
980                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
981                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
982                            new Exception());
983                    }
984                    if (indexNames == null) {
985                        rebuildAllIndexes(getEventReport(event));
986                    } else {
987                        rebuildIndexes(indexNames, getEventReport(event));
988                    }
989                } catch (CmsException e) {
990                    if (LOG.isErrorEnabled()) {
991                        LOG.error(
992                            Messages.get().getBundle().key(
993                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
994                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
995                            e);
996                    }
997                }
998                break;
999            case I_CmsEventListener.EVENT_CLEAR_CACHES:
1000                if (LOG.isDebugEnabled()) {
1001                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
1002                }
1003                break;
1004            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
1005                // event data contains a list of the published resources
1006                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1007                if (LOG.isDebugEnabled()) {
1008                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1009                }
1010                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1011                if (LOG.isDebugEnabled()) {
1012                    LOG.debug(
1013                        Messages.get().getBundle().key(
1014                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1015                            publishHistoryId));
1016                }
1017                break;
1018            default:
1019                // no operation
1020        }
1021    }
1022
1023    /**
1024     * Returns all Solr index.<p>
1025     *
1026     * @return all Solr indexes
1027     */
1028    public List<CmsSolrIndex> getAllSolrIndexes() {
1029
1030        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1031        for (String indexName : getIndexNames()) {
1032            CmsSolrIndex index = getIndexSolr(indexName);
1033            if (index != null) {
1034                result.add(index);
1035            }
1036        }
1037        return result;
1038    }
1039
1040    /**
1041     * Returns an analyzer for the given language.<p>
1042     *
1043     * The analyzer is selected according to the analyzer configuration.<p>
1044     *
1045     * @param locale the locale to get the analyzer for
1046     * @return the appropriate lucene analyzer
1047     *
1048     * @throws CmsSearchException if something goes wrong
1049     */
1050    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1051
1052        Analyzer analyzer = null;
1053        String className = null;
1054
1055        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1056        if (analyzerConf == null) {
1057            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1058        }
1059
1060        try {
1061            analyzer = getAnalyzer(analyzerConf.getClassName());
1062        } catch (Exception e) {
1063            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1064        }
1065
1066        return analyzer;
1067    }
1068
1069    /**
1070     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1071     *
1072     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1073     *
1074     * @return an unmodifiable view of the Analyzers Map
1075     */
1076    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1077
1078        return Collections.unmodifiableMap(m_analyzers);
1079    }
1080
1081    /**
1082     * Returns the search analyzer for the given locale.<p>
1083     *
1084     * @param locale the locale to get the analyzer for
1085     *
1086     * @return the search analyzer for the given locale
1087     */
1088    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1089
1090        return m_analyzers.get(locale);
1091    }
1092
1093    /**
1094     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1095     *
1096     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1097     */
1098    public String getDirectory() {
1099
1100        return m_path;
1101    }
1102
1103    /**
1104     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1105     *
1106     * @return the Solr home directory
1107     */
1108    public String getDirectorySolr() {
1109
1110        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1111    }
1112
1113    /**
1114     * Returns a lucene document factory for given resource.<p>
1115     *
1116     * The type of the document factory is selected by the type of the resource
1117     * and the MIME type of the resource content, according to the configuration in <code>opencms-search.xml</code>.<p>
1118     *
1119     * @param resource a cms resource
1120     * @return a lucene document factory or null
1121     */
1122    public I_CmsDocumentFactory getDocumentFactory(CmsResource resource) {
1123
1124        // first get the MIME type of the resource
1125        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1126        String resourceType = null;
1127        try {
1128            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1129        } catch (CmsLoaderException e) {
1130            // ignore, unknown resource type, resource can not be indexed
1131            LOG.info(e.getLocalizedMessage(), e);
1132        }
1133        return getDocumentFactory(resourceType, mimeType);
1134    }
1135
1136    /**
1137     * Returns a lucene document factory for given resource type and MIME type.<p>
1138     *
1139     * The type of the document factory is selected  according to the configuration
1140     * in <code>opencms-search.xml</code>.<p>
1141     *
1142     * @param resourceType the resource type name
1143     * @param mimeType the MIME type
1144     *
1145     * @return a lucene document factory or null in case no matching factory was found
1146     */
1147    public I_CmsDocumentFactory getDocumentFactory(String resourceType, String mimeType) {
1148
1149        I_CmsDocumentFactory result = null;
1150        if (resourceType != null) {
1151            // create the factory lookup key for the document
1152            String documentTypeKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1153            // check if a setting is available for this specific MIME type
1154            result = m_documentTypes.get(documentTypeKey);
1155            if (result == null) {
1156                // no setting is available, try to use a generic setting without MIME type
1157                result = m_documentTypes.get(A_CmsVfsDocument.getDocumentKey(resourceType, null));
1158                // please note: the result may still be null
1159            }
1160        }
1161        return result;
1162    }
1163
1164    /**
1165     * Returns a document type config.<p>
1166     *
1167     * @param name the name of the document type config
1168     * @return the document type config.
1169     */
1170    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1171
1172        // this is really used only for the search manager GUI,
1173        // so performance is not an issue and no lookup map is generated
1174        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1175            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1176            if (type.getName().equals(name)) {
1177                return type;
1178            }
1179        }
1180        return null;
1181    }
1182
1183    /**
1184     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1185     *
1186     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1187     */
1188    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1189
1190        return Collections.unmodifiableList(m_documentTypeConfigs);
1191    }
1192
1193    /**
1194     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1195     *
1196     * @return the maximum age a text extraction result is kept in the cache (in hours)
1197     */
1198    public float getExtractionCacheMaxAge() {
1199
1200        return m_extractionCacheMaxAge;
1201    }
1202
1203    /**
1204     * Returns the search field configuration with the given name.<p>
1205     *
1206     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1207     *
1208     * @param name the name to get the search field configuration for
1209     *
1210     * @return the search field configuration with the given name
1211     */
1212    public CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1213
1214        return m_fieldConfigurations.get(name);
1215    }
1216
1217    /**
1218     * Returns the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries.<p>
1219     *
1220     * @return the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries
1221     */
1222    public List<CmsSearchFieldConfiguration> getFieldConfigurations() {
1223
1224        List<CmsSearchFieldConfiguration> result = new ArrayList<CmsSearchFieldConfiguration>(
1225            m_fieldConfigurations.values());
1226        Collections.sort(result);
1227        return Collections.unmodifiableList(result);
1228    }
1229
1230    /**
1231     * Returns the Lucene search field configurations only.<p>
1232     *
1233     * @return the Lucene search field configurations
1234     */
1235    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1236
1237        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1238        for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1239            if (conf instanceof CmsLuceneFieldConfiguration) {
1240                result.add((CmsLuceneFieldConfiguration)conf);
1241            }
1242        }
1243        Collections.sort(result);
1244        return Collections.unmodifiableList(result);
1245    }
1246
1247    /**
1248     * Returns the Solr search field configurations only.<p>
1249     *
1250     * @return the Solr search field configurations
1251     */
1252    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1253
1254        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1255        for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1256            if (conf instanceof CmsSolrFieldConfiguration) {
1257                result.add((CmsSolrFieldConfiguration)conf);
1258            }
1259        }
1260        Collections.sort(result);
1261        return Collections.unmodifiableList(result);
1262    }
1263
1264    /**
1265     * Returns the force unlock mode during indexing.<p>
1266     *
1267     * @return the force unlock mode during indexing
1268     */
1269    public CmsSearchForceUnlockMode getForceunlock() {
1270
1271        return m_forceUnlockMode;
1272    }
1273
1274    /**
1275     * Returns the highlighter.<p>
1276     *
1277     * @return the highlighter
1278     */
1279    public I_CmsTermHighlighter getHighlighter() {
1280
1281        return m_highlighter;
1282    }
1283
1284    /**
1285     * Returns the Lucene search index configured with the given name.<p>
1286     * The index must exist, otherwise <code>null</code> is returned.
1287     *
1288     * @param indexName then name of the requested search index
1289     *
1290     * @return the Lucene search index configured with the given name
1291     */
1292    public CmsSearchIndex getIndex(String indexName) {
1293
1294        for (CmsSearchIndex index : m_indexes) {
1295            if (indexName.equalsIgnoreCase(index.getName())) {
1296                return index;
1297            }
1298        }
1299        return null;
1300    }
1301
1302    /**
1303     * Returns the seconds to wait for an index lock during an update operation.<p>
1304     *
1305     * @return the seconds to wait for an index lock during an update operation
1306     */
1307    public int getIndexLockMaxWaitSeconds() {
1308
1309        return m_indexLockMaxWaitSeconds;
1310    }
1311
1312    /**
1313     * Returns the names of all configured indexes.<p>
1314     *
1315     * @return list of names
1316     */
1317    public List<String> getIndexNames() {
1318
1319        List<String> indexNames = new ArrayList<String>();
1320        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1321            indexNames.add((m_indexes.get(i)).getName());
1322        }
1323
1324        return indexNames;
1325    }
1326
1327    /**
1328     * Returns the Solr index configured with the given name.<p>
1329     * The index must exist, otherwise <code>null</code> is returned.
1330     *
1331     * @param indexName then name of the requested Solr index
1332     * @return the Solr index configured with the given name
1333     */
1334    public CmsSolrIndex getIndexSolr(String indexName) {
1335
1336        CmsSearchIndex index = getIndex(indexName);
1337        if (index instanceof CmsSolrIndex) {
1338            return (CmsSolrIndex)index;
1339        }
1340        return null;
1341    }
1342
1343    /**
1344     * Returns a search index source for a specified source name.<p>
1345     *
1346     * @param sourceName the name of the index source
1347     * @return a search index source
1348     */
1349    public CmsSearchIndexSource getIndexSource(String sourceName) {
1350
1351        return m_indexSources.get(sourceName);
1352    }
1353
1354    /**
1355     * Returns the max. excerpt length.<p>
1356     *
1357     * @return the max excerpt length
1358     */
1359    public int getMaxExcerptLength() {
1360
1361        return m_maxExcerptLength;
1362    }
1363
1364    /**
1365     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1366     *
1367     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1368     */
1369    public long getMaxIndexWaitTime() {
1370
1371        return m_maxIndexWaitTime;
1372    }
1373
1374    /**
1375     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1376     *
1377     * @return the maximum number of modifications before a commit in the search index is triggered
1378     */
1379    public int getMaxModificationsBeforeCommit() {
1380
1381        return m_maxModificationsBeforeCommit;
1382    }
1383
1384    /**
1385     * Returns the update frequency of the offline indexer in milliseconds.<p>
1386     *
1387     * @return the update frequency of the offline indexer in milliseconds
1388     */
1389    public long getOfflineUpdateFrequency() {
1390
1391        return m_offlineUpdateFrequency;
1392    }
1393
1394    /**
1395     * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p>
1396     *
1397     * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances
1398     */
1399    public List<CmsSearchIndex> getSearchIndexes() {
1400
1401        return Collections.unmodifiableList(m_indexes);
1402    }
1403
1404    /**
1405     * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p>
1406     *
1407     * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances
1408     */
1409    public List<CmsSearchIndex> getSearchIndexesAll() {
1410
1411        return Collections.unmodifiableList(m_indexes);
1412    }
1413
1414    /**
1415     * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p>
1416     *
1417     * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances
1418     */
1419    public List<CmsSolrIndex> getSearchIndexesSolr() {
1420
1421        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1422        for (CmsSearchIndex index : m_indexes) {
1423            if (index instanceof CmsSolrIndex) {
1424                indexes.add((CmsSolrIndex)index);
1425            }
1426        }
1427        return Collections.unmodifiableList(indexes);
1428    }
1429
1430    /**
1431     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1432     *
1433     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1434     */
1435    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1436
1437        return Collections.unmodifiableMap(m_indexSources);
1438    }
1439
1440    /**
1441     * Return singleton instance of the OpenCms spellchecker.<p>
1442     *
1443     * @param cms the cms object.
1444     *
1445     * @return instance of CmsSolrSpellchecker.
1446     */
1447    public CmsSolrSpellchecker getSolrDictionary(CmsObject cms) {
1448
1449        // get the core container that contains one core for each configured index
1450        if (m_coreContainer == null) {
1451            m_coreContainer = createCoreContainer();
1452        }
1453        SolrCore spellcheckCore = m_coreContainer.getCore(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE);
1454        if (spellcheckCore == null) {
1455            LOG.error(
1456                Messages.get().getBundle().key(
1457                    Messages.ERR_SPELLCHECK_CORE_NOT_AVAILABLE_1,
1458                    CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE));
1459            return null;
1460        } else {
1461            return CmsSolrSpellchecker.getInstance(m_coreContainer, spellcheckCore);
1462        }
1463    }
1464
1465    /**
1466     * Returns the Solr configuration.<p>
1467     *
1468     * @return the Solr configuration
1469     */
1470    public CmsSolrConfiguration getSolrServerConfiguration() {
1471
1472        return m_solrConfig;
1473    }
1474
1475    /**
1476     * Returns the timeout to abandon threads indexing a resource.<p>
1477     *
1478     * @return the timeout to abandon threads indexing a resource
1479     */
1480    public long getTimeout() {
1481
1482        return m_timeout;
1483    }
1484
1485    /**
1486     * Initializes the search manager.<p>
1487     *
1488     * @param cms the cms object
1489     *
1490     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1491     */
1492    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1493
1494        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1495        try {
1496            // store the Admin cms to index Cms resources
1497            m_adminCms = OpenCms.initCmsObject(cms);
1498        } catch (CmsException e) {
1499            // this should never happen
1500            LOG.error(e.getLocalizedMessage(), e);
1501        }
1502        // make sure the site root is the root site
1503        m_adminCms.getRequestContext().setSiteRoot("/");
1504
1505        // create the extraction result cache
1506        m_extractionResultCache = new CmsExtractionResultCache(
1507            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1508            "/extractCache");
1509        initializeIndexes();
1510        initOfflineIndexes();
1511
1512        // register this object as event listener
1513        OpenCms.addCmsEventListener(
1514            this,
1515            new int[] {
1516                I_CmsEventListener.EVENT_CLEAR_CACHES,
1517                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1518                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES});
1519    }
1520
1521    /**
1522     * Initializes all configured document types and search indexes.<p>
1523     *
1524     * This methods needs to be called if after a change in the index configuration has been made.
1525     */
1526    public void initializeIndexes() {
1527
1528        initAvailableDocumentTypes();
1529        initSearchIndexes();
1530    }
1531
1532    /**
1533     * Initialize the offline index handler, require after an offline index has been added.<p>
1534     */
1535    public void initOfflineIndexes() {
1536
1537        // check which indexes are configured as offline indexes
1538        List<CmsSearchIndex> offlineIndexes = new ArrayList<CmsSearchIndex>();
1539        Iterator<CmsSearchIndex> i = m_indexes.iterator();
1540        while (i.hasNext()) {
1541            CmsSearchIndex index = i.next();
1542            if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1543                // this is an offline index
1544                offlineIndexes.add(index);
1545            }
1546        }
1547        m_offlineIndexes = offlineIndexes;
1548        m_offlineHandler.initialize();
1549
1550    }
1551
1552    /**
1553     * Returns if the offline indexing is paused.<p>
1554     *
1555     * @return <code>true</code> if the offline indexing is paused
1556     */
1557    public boolean isOfflineIndexingPaused() {
1558
1559        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1560    }
1561
1562    /**
1563     * Updates the indexes from as a scheduled job.<p>
1564     *
1565     * @param cms the OpenCms user context to use when reading resources from the VFS
1566     * @param parameters the parameters for the scheduled job
1567     *
1568     * @throws Exception if something goes wrong
1569     *
1570     * @return the String to write in the scheduler log
1571     *
1572     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1573     */
1574    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1575
1576        CmsSearchManager manager = OpenCms.getSearchManager();
1577
1578        I_CmsReport report = null;
1579        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1580
1581        if (writeLog) {
1582            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1583        }
1584
1585        List<String> updateList = null;
1586        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1587        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1588            // index list has been provided as job parameter
1589            updateList = new ArrayList<String>();
1590            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1591            for (int i = 0; i < indexNames.length; i++) {
1592                // check if the index actually exists
1593                if (manager.getIndex(indexNames[i]) != null) {
1594                    updateList.add(indexNames[i]);
1595                } else {
1596                    if (LOG.isWarnEnabled()) {
1597                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1598                    }
1599                }
1600            }
1601        }
1602
1603        long startTime = System.currentTimeMillis();
1604
1605        if (updateList == null) {
1606            // all indexes need to be updated
1607            manager.rebuildAllIndexes(report);
1608        } else {
1609            // rebuild only the selected indexes
1610            manager.rebuildIndexes(updateList, report);
1611        }
1612
1613        long runTime = System.currentTimeMillis() - startTime;
1614
1615        String finishMessage = Messages.get().getBundle().key(
1616            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1617            CmsStringUtil.formatRuntime(runTime));
1618
1619        if (LOG.isInfoEnabled()) {
1620            LOG.info(finishMessage);
1621        }
1622        return finishMessage;
1623    }
1624
1625    /**
1626     * Pauses the offline indexing.<p>
1627     * May take some time, because the indexes are updated first.<p>
1628     */
1629    public void pauseOfflineIndexing() {
1630
1631        if (m_offlineUpdateFrequency != Long.MAX_VALUE) {
1632            m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1633            m_offlineUpdateFrequency = Long.MAX_VALUE;
1634            updateOfflineIndexes(0);
1635        }
1636    }
1637
1638    /**
1639     * Rebuilds (if required creates) all configured indexes.<p>
1640     *
1641     * @param report the report object to write messages (or <code>null</code>)
1642     *
1643     * @throws CmsException if something goes wrong
1644     */
1645    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1646
1647        try {
1648            SEARCH_MANAGER_LOCK.lock();
1649
1650            CmsMessageContainer container = null;
1651            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1652                // iterate all configured search indexes
1653                CmsSearchIndex searchIndex = m_indexes.get(i);
1654                try {
1655                    // update the index
1656                    updateIndex(searchIndex, report, null);
1657                } catch (CmsException e) {
1658                    container = new CmsMessageContainer(
1659                        Messages.get(),
1660                        Messages.ERR_INDEX_REBUILD_ALL_1,
1661                        new Object[] {searchIndex.getName()});
1662                    LOG.error(
1663                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1664                        e);
1665                }
1666            }
1667            // clean up the extraction result cache
1668            cleanExtractionCache();
1669            if (container != null) {
1670                // throw stored exception
1671                throw new CmsSearchException(container);
1672            }
1673        } finally {
1674            SEARCH_MANAGER_LOCK.unlock();
1675        }
1676    }
1677
1678    /**
1679     * Rebuilds (if required creates) the index with the given name.<p>
1680     *
1681     * @param indexName the name of the index to rebuild
1682     * @param report the report object to write messages (or <code>null</code>)
1683     *
1684     * @throws CmsException if something goes wrong
1685     */
1686    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1687
1688        try {
1689            SEARCH_MANAGER_LOCK.lock();
1690            // get the search index by name
1691            CmsSearchIndex index = getIndex(indexName);
1692            // update the index
1693            updateIndex(index, report, null);
1694            // clean up the extraction result cache
1695            cleanExtractionCache();
1696        } finally {
1697            SEARCH_MANAGER_LOCK.unlock();
1698        }
1699    }
1700
1701    /**
1702     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1703     *
1704     * @param indexNames the names (String) of the index to rebuild
1705     * @param report the report object to write messages (or <code>null</code>)
1706     *
1707     * @throws CmsException if something goes wrong
1708     */
1709    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1710
1711        try {
1712            SEARCH_MANAGER_LOCK.lock();
1713            Iterator<String> i = indexNames.iterator();
1714            while (i.hasNext()) {
1715                String indexName = i.next();
1716                // get the search index by name
1717                CmsSearchIndex index = getIndex(indexName);
1718                if (index != null) {
1719                    // update the index
1720                    updateIndex(index, report, null);
1721                } else {
1722                    if (LOG.isWarnEnabled()) {
1723                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1724                    }
1725                }
1726            }
1727            // clean up the extraction result cache
1728            cleanExtractionCache();
1729        } finally {
1730            SEARCH_MANAGER_LOCK.unlock();
1731        }
1732    }
1733
1734    /**
1735     * Registers a new Solr core for the given index.<p>
1736     *
1737     * @param index the index to register a new Solr core for
1738     *
1739     * @throws CmsConfigurationException if no Solr server is configured
1740     */
1741    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
1742
1743        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
1744            // No solr server configured
1745            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
1746        }
1747
1748        if (m_solrConfig.getServerUrl() != null) {
1749            // HTTP Server configured
1750            // TODO Implement multi core support for HTTP server
1751            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
1752            index.setSolrServer(new HttpSolrClient(m_solrConfig.getServerUrl()));
1753        }
1754
1755        // get the core container that contains one core for each configured index
1756        if (m_coreContainer == null) {
1757            m_coreContainer = createCoreContainer();
1758        }
1759
1760        // create a new core if no core exists for the given index
1761        if (!m_coreContainer.getCoreNames().contains(index.getCoreName())) {
1762            // Being sure the core container is not 'null',
1763            // we can create a core for this index if not already existent
1764            File dataDir = new File(index.getPath());
1765            if (!dataDir.exists()) {
1766                dataDir.mkdirs();
1767                if (CmsLog.INIT.isInfoEnabled()) {
1768                    CmsLog.INIT.info(
1769                        Messages.get().getBundle().key(
1770                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
1771                            index.getName(),
1772                            index.getPath()));
1773                }
1774            }
1775            File instanceDir = new File(
1776                m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
1777            if (!instanceDir.exists()) {
1778                instanceDir.mkdirs();
1779                if (CmsLog.INIT.isInfoEnabled()) {
1780                    CmsLog.INIT.info(
1781                        Messages.get().getBundle().key(
1782                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
1783                            index.getName(),
1784                            index.getPath()));
1785                }
1786            }
1787
1788            // create the core
1789            // TODO: suboptimal - forces always the same schema
1790            SolrCore core = null;
1791            try {
1792                // creation includes registration.
1793                // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
1794                Map<String, String> properties = new HashMap<String, String>(3);
1795                properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
1796                properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
1797                core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties);
1798            } catch (NullPointerException e) {
1799                if (core != null) {
1800                    core.close();
1801                }
1802                throw new CmsConfigurationException(
1803                    Messages.get().container(
1804                        Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
1805                        index.getName() + " (" + index.getCoreName() + ")",
1806                        index.getPath(),
1807                        m_solrConfig.getSolrConfigFile().getAbsolutePath()),
1808                    e);
1809            }
1810        }
1811        if (index.isNoSolrServerSet()) {
1812            index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
1813        }
1814        if (CmsLog.INIT.isInfoEnabled()) {
1815            CmsLog.INIT.info(
1816                Messages.get().getBundle().key(
1817                    Messages.INIT_SOLR_SERVER_CREATED_1,
1818                    index.getName() + " (" + index.getCoreName() + ")"));
1819        }
1820    }
1821
1822    /**
1823     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
1824     *
1825     * @param fieldConfiguration the field configuration to remove from the configuration
1826     *
1827     * @return true if remove was successful, false if preconditions for removal are ok but the given
1828     *         field configuration was unknown to the manager.
1829     *
1830     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
1831     *         <code>{@link CmsSearchIndex}</code>.
1832     *
1833     */
1834    public boolean removeSearchFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration)
1835    throws CmsIllegalStateException {
1836
1837        // never remove the standard field configuration
1838        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
1839            throw new CmsIllegalStateException(
1840                Messages.get().container(
1841                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
1842                    fieldConfiguration.getName()));
1843        }
1844        // validation if removal will be granted
1845        Iterator<CmsSearchIndex> itIndexes = m_indexes.iterator();
1846        CmsSearchIndex idx;
1847        // the list for collecting indexes that use the given field configuration
1848        List<CmsSearchIndex> referrers = new ArrayList<CmsSearchIndex>();
1849        CmsSearchFieldConfiguration refFieldConfig;
1850        while (itIndexes.hasNext()) {
1851            idx = itIndexes.next();
1852            refFieldConfig = idx.getFieldConfiguration();
1853            if (refFieldConfig.equals(fieldConfiguration)) {
1854                referrers.add(idx);
1855            }
1856        }
1857        if (referrers.size() > 0) {
1858            throw new CmsIllegalStateException(
1859                Messages.get().container(
1860                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
1861                    fieldConfiguration.getName(),
1862                    referrers.toString()));
1863        }
1864
1865        // remove operation (no exception)
1866        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
1867
1868    }
1869
1870    /**
1871     * Removes a search field from the field configuration.<p>
1872     *
1873     * @param fieldConfiguration the field configuration
1874     * @param field field to remove from the field configuration
1875     *
1876     * @return true if remove was successful, false if preconditions for removal are ok but the given
1877     *         field was unknown.
1878     *
1879     * @throws CmsIllegalStateException if the given field is the last field inside the given field configuration.
1880     */
1881    public boolean removeSearchFieldConfigurationField(
1882        CmsSearchFieldConfiguration fieldConfiguration,
1883        CmsSearchField field)
1884    throws CmsIllegalStateException {
1885
1886        if (fieldConfiguration.getFields().size() < 2) {
1887            throw new CmsIllegalStateException(
1888                Messages.get().container(
1889                    Messages.ERR_CONFIGURATION_FIELD_DELETE_2,
1890                    field.getName(),
1891                    fieldConfiguration.getName()));
1892        } else {
1893
1894            if (LOG.isInfoEnabled()) {
1895                LOG.info(
1896                    Messages.get().getBundle().key(
1897                        Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
1898                        field.getName(),
1899                        fieldConfiguration.getName()));
1900            }
1901
1902            return fieldConfiguration.getFields().remove(field);
1903        }
1904    }
1905
1906    /**
1907     * Removes a search field mapping from the given field.<p>
1908     *
1909     * @param field the field
1910     * @param mapping mapping to remove from the field
1911     *
1912     * @return true if remove was successful, false if preconditions for removal are ok but the given
1913     *         mapping was unknown.
1914     *
1915     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
1916     */
1917    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
1918    throws CmsIllegalStateException {
1919
1920        if (field.getMappings().size() < 2) {
1921            throw new CmsIllegalStateException(
1922                Messages.get().container(
1923                    Messages.ERR_FIELD_MAPPING_DELETE_2,
1924                    mapping.getType().toString(),
1925                    field.getName()));
1926        } else {
1927
1928            if (LOG.isInfoEnabled()) {
1929                LOG.info(
1930                    Messages.get().getBundle().key(
1931                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
1932                        mapping.toString(),
1933                        field.getName()));
1934            }
1935            return field.getMappings().remove(mapping);
1936        }
1937    }
1938
1939    /**
1940     * Removes a search index from the configuration.<p>
1941     *
1942     * @param searchIndex the search index to remove
1943     */
1944    public void removeSearchIndex(CmsSearchIndex searchIndex) {
1945
1946        // shut down index to remove potential config files of Solr indexes
1947        searchIndex.shutDown();
1948        if (searchIndex instanceof CmsSolrIndex) {
1949            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
1950            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
1951        }
1952        m_indexes.remove(searchIndex);
1953        initOfflineIndexes();
1954
1955        if (LOG.isInfoEnabled()) {
1956            LOG.info(
1957                Messages.get().getBundle().key(
1958                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
1959                    searchIndex.getName(),
1960                    searchIndex.getProject()));
1961        }
1962    }
1963
1964    /**
1965     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
1966     *
1967     * @param indexNames the names of the index to remove
1968     */
1969    public void removeSearchIndexes(List<String> indexNames) {
1970
1971        Iterator<String> i = indexNames.iterator();
1972        while (i.hasNext()) {
1973            String indexName = i.next();
1974            // get the search index by name
1975            CmsSearchIndex index = getIndex(indexName);
1976            if (index != null) {
1977                // remove the index
1978                removeSearchIndex(index);
1979            } else {
1980                if (LOG.isWarnEnabled()) {
1981                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1982                }
1983            }
1984        }
1985    }
1986
1987    /**
1988     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
1989     *
1990     * @param indexsource the indexsource to remove from the configuration
1991     *
1992     * @return true if remove was successful, false if preconditions for removal are ok but the given
1993     *         searchindex was unknown to the manager.
1994     *
1995     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
1996     *         <code>{@link CmsSearchIndex}</code>.
1997     *
1998     */
1999    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2000
2001        // validation if removal will be granted
2002        Iterator<CmsSearchIndex> itIndexes = m_indexes.iterator();
2003        CmsSearchIndex idx;
2004        // the list for collecting indexes that use the given index source
2005        List<CmsSearchIndex> referrers = new ArrayList<CmsSearchIndex>();
2006        // the current list of referred index sources of the iterated index
2007        List<CmsSearchIndexSource> refsources;
2008        while (itIndexes.hasNext()) {
2009            idx = itIndexes.next();
2010            refsources = idx.getSources();
2011            if (refsources != null) {
2012                if (refsources.contains(indexsource)) {
2013                    referrers.add(idx);
2014                }
2015            }
2016        }
2017        if (referrers.size() > 0) {
2018            throw new CmsIllegalStateException(
2019                Messages.get().container(
2020                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2021                    indexsource.getName(),
2022                    referrers.toString()));
2023        }
2024
2025        // remove operation (no exception)
2026        return m_indexSources.remove(indexsource.getName()) != null;
2027
2028    }
2029
2030    /**
2031     * Resumes offline indexing if it was paused.<p>
2032     */
2033    public void resumeOfflineIndexing() {
2034
2035        if (m_offlineUpdateFrequency == Long.MAX_VALUE) {
2036            setOfflineUpdateFrequency(
2037                m_configuredOfflineIndexingFrequency > 0
2038                ? m_configuredOfflineIndexingFrequency
2039                : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2040        }
2041    }
2042
2043    /**
2044     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2045     *
2046     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2047     */
2048    public void setDirectory(String value) {
2049
2050        m_path = value;
2051    }
2052
2053    /**
2054     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2055     *
2056     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2057     */
2058    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2059
2060        m_extractionCacheMaxAge = extractionCacheMaxAge;
2061    }
2062
2063    /**
2064     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2065     *
2066     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2067     */
2068    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2069
2070        try {
2071            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2072        } catch (NumberFormatException e) {
2073            LOG.error(
2074                Messages.get().getBundle().key(
2075                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2076                    extractionCacheMaxAge,
2077                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2078                e);
2079            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2080        }
2081    }
2082
2083    /**
2084     * Sets the unlock mode during indexing.<p>
2085     *
2086     * @param value the value
2087     */
2088    public void setForceunlock(String value) {
2089
2090        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2091    }
2092
2093    /**
2094     * Sets the highlighter.<p>
2095     *
2096     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2097     *
2098     * @param highlighter the package/class name of the highlighter
2099     */
2100    public void setHighlighter(String highlighter) {
2101
2102        try {
2103            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2104        } catch (Exception e) {
2105            m_highlighter = null;
2106            LOG.error(e.getLocalizedMessage(), e);
2107        }
2108    }
2109
2110    /**
2111     * Sets the seconds to wait for an index lock during an update operation.<p>
2112     *
2113     * @param value the seconds to wait for an index lock during an update operation
2114     */
2115    public void setIndexLockMaxWaitSeconds(int value) {
2116
2117        m_indexLockMaxWaitSeconds = value;
2118    }
2119
2120    /**
2121     * Sets the max. excerpt length.<p>
2122     *
2123     * @param maxExcerptLength the max. excerpt length to set
2124     */
2125    public void setMaxExcerptLength(int maxExcerptLength) {
2126
2127        m_maxExcerptLength = maxExcerptLength;
2128    }
2129
2130    /**
2131     * Sets the max. excerpt length as a String.<p>
2132     *
2133     * @param maxExcerptLength the max. excerpt length to set
2134     */
2135    public void setMaxExcerptLength(String maxExcerptLength) {
2136
2137        try {
2138            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2139        } catch (Exception e) {
2140            LOG.error(
2141                Messages.get().getBundle().key(
2142                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2143                    maxExcerptLength,
2144                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2145                e);
2146            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2147        }
2148    }
2149
2150    /**
2151     * Sets the maximal wait time for offline index updates after edit operations.<p>
2152     *
2153     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2154     */
2155    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2156
2157        m_maxIndexWaitTime = maxIndexWaitTime;
2158    }
2159
2160    /**
2161     * Sets the maximal wait time for offline index updates after edit operations.<p>
2162     *
2163     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2164     */
2165    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2166
2167        try {
2168            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2169        } catch (Exception e) {
2170            LOG.error(
2171                Messages.get().getBundle().key(
2172                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2173                    maxIndexWaitTime,
2174                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2175                e);
2176            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2177        }
2178    }
2179
2180    /**
2181     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2182     *
2183     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2184     */
2185    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2186
2187        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2188    }
2189
2190    /**
2191     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2192     *
2193     * @param value the maximum number of modifications to set
2194     */
2195    public void setMaxModificationsBeforeCommit(String value) {
2196
2197        try {
2198            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2199        } catch (Exception e) {
2200            LOG.error(
2201                Messages.get().getBundle().key(
2202                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2203                    value,
2204                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2205                e);
2206            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2207        }
2208    }
2209
2210    /**
2211     * Sets the update frequency of the offline indexer in milliseconds.<p>
2212     *
2213     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2214     */
2215    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2216
2217        m_offlineUpdateFrequency = offlineUpdateFrequency;
2218        updateOfflineIndexes(0);
2219    }
2220
2221    /**
2222     * Sets the update frequency of the offline indexer in milliseconds.<p>
2223     *
2224     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2225     */
2226    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2227
2228        try {
2229            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2230        } catch (Exception e) {
2231            LOG.error(
2232                Messages.get().getBundle().key(
2233                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2234                    offlineUpdateFrequency,
2235                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2236                e);
2237            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2238        }
2239    }
2240
2241    /**
2242     * Sets the Solr configuration.<p>
2243     *
2244     * @param config the Solr configuration
2245     */
2246    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2247
2248        m_solrConfig = config;
2249    }
2250
2251    /**
2252     * Sets the timeout to abandon threads indexing a resource.<p>
2253     *
2254     * @param value the timeout in milliseconds
2255     */
2256    public void setTimeout(long value) {
2257
2258        m_timeout = value;
2259    }
2260
2261    /**
2262     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2263     *
2264     * @param value the timeout in milliseconds
2265     */
2266    public void setTimeout(String value) {
2267
2268        try {
2269            setTimeout(Long.parseLong(value));
2270        } catch (Exception e) {
2271            LOG.error(
2272                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2273                e);
2274            setTimeout(DEFAULT_TIMEOUT);
2275        }
2276    }
2277
2278    /**
2279     * Shuts down the search manager.<p>
2280     *
2281     * This will cause all search indices to be shut down.<p>
2282     */
2283    public void shutDown() {
2284
2285        if (m_offlineIndexThread != null) {
2286            m_offlineIndexThread.shutDown();
2287        }
2288
2289        if (m_offlineHandler != null) {
2290            OpenCms.removeCmsEventListener(m_offlineHandler);
2291        }
2292
2293        Iterator<CmsSearchIndex> i = m_indexes.iterator();
2294        while (i.hasNext()) {
2295            CmsSearchIndex index = i.next();
2296            index.shutDown();
2297            index = null;
2298        }
2299        m_indexes.clear();
2300
2301        shutDownSolrContainer();
2302
2303        if (CmsLog.INIT.isInfoEnabled()) {
2304            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2305        }
2306    }
2307
2308    /**
2309     * Updates all offline indexes.<p>
2310     *
2311     * Can be used to force an index update when it's not convenient to wait until the
2312     * offline update interval has eclipsed.<p>
2313     *
2314     * Since the offline indexes still need some time to update the new resources,
2315     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2316     * to ensure that updating is finished.
2317     *
2318     * @see #updateOfflineIndexes(long)
2319     *
2320     */
2321    public void updateOfflineIndexes() {
2322
2323        updateOfflineIndexes(getMaxIndexWaitTime());
2324    }
2325
2326    /**
2327     * Updates all offline indexes.<p>
2328     *
2329     * Can be used to force an index update when it's not convenient to wait until the
2330     * offline update interval has eclipsed.<p>
2331     *
2332     * Since the offline index will still need some time to update the new resources even if it runs directly,
2333     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2334     *
2335     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2336     */
2337    public void updateOfflineIndexes(long waitTime) {
2338
2339        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2340            // notify existing thread of update frequency change
2341            if (LOG.isDebugEnabled()) {
2342                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2343            }
2344            m_offlineIndexThread.interrupt();
2345            if (waitTime > 0) {
2346                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2347            }
2348        }
2349    }
2350
2351    /**
2352     * Cleans up the extraction result cache.<p>
2353     */
2354    protected void cleanExtractionCache() {
2355
2356        // clean up the extraction result cache
2357        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2358    }
2359
2360    /**
2361     * Collects the related containerpages to the resources that have been published.<p>
2362     *
2363     * @param adminCms an OpenCms user context with Admin permissions
2364     * @param updateResources the resources to be re-indexed
2365     *
2366     * @return the updated list of resource to re-index
2367     */
2368    protected List<CmsPublishedResource> findRelatedContainerPages(
2369        CmsObject adminCms,
2370        List<CmsPublishedResource> updateResources) {
2371
2372        Set<CmsResource> elementGroups = new HashSet<CmsResource>();
2373        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2374        int containerPageTypeId = -1;
2375        try {
2376            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2377        } catch (CmsLoaderException e) {
2378            // will happen during setup, when container page type is not available yet
2379            LOG.info(e.getLocalizedMessage(), e);
2380        }
2381        if (containerPageTypeId != -1) {
2382            for (CmsPublishedResource pubRes : updateResources) {
2383                try {
2384                    if (OpenCms.getResourceManager().getResourceType(
2385                        pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2386                        CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2387                        filter.filterStrong();
2388                        List<CmsRelation> relations = adminCms.readRelations(filter);
2389                        for (CmsRelation relation : relations) {
2390                            CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2391                            if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2392                                containerPages.add(res);
2393                                if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) {
2394                                    addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2395                                }
2396                            } else if (OpenCms.getResourceManager().getResourceType(
2397                                res.getTypeId()).getTypeName().equals(
2398                                    CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) {
2399                                elementGroups.add(res);
2400                            }
2401                        }
2402                    }
2403                    if (containerPageTypeId == pubRes.getType()) {
2404                        addDetailContent(
2405                            adminCms,
2406                            containerPages,
2407                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2408                    }
2409                } catch (CmsException e) {
2410                    LOG.error(e.getLocalizedMessage(), e);
2411                }
2412            }
2413            for (CmsResource pubRes : elementGroups) {
2414                try {
2415                    CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2416                    filter.filterStrong();
2417                    List<CmsRelation> relations = adminCms.readRelations(filter);
2418                    for (CmsRelation relation : relations) {
2419                        CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2420                        if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2421                            containerPages.add(res);
2422                            if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) {
2423                                addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2424                            }
2425                        }
2426                    }
2427                } catch (CmsException e) {
2428                    LOG.error(e.getLocalizedMessage(), e);
2429                }
2430            }
2431            // add all found container pages as published resource objects to the list
2432            for (CmsResource page : containerPages) {
2433                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2434                if (!updateResources.contains(pubCont)) {
2435                    // ensure container page is added only once
2436                    updateResources.add(pubCont);
2437                }
2438            }
2439        }
2440        return updateResources;
2441    }
2442
2443    /**
2444     * Returns the set of names of all configured document types.<p>
2445     *
2446     * @return the set of names of all configured document types
2447     */
2448    protected List<String> getDocumentTypes() {
2449
2450        List<String> names = new ArrayList<String>();
2451        for (Iterator<I_CmsDocumentFactory> i = m_documentTypes.values().iterator(); i.hasNext();) {
2452            I_CmsDocumentFactory factory = i.next();
2453            names.add(factory.getName());
2454        }
2455        return names;
2456    }
2457
2458    /**
2459     * Returns the a offline project used for offline indexing.<p>
2460     *
2461     * @return the offline project if available
2462     */
2463    protected CmsProject getOfflineIndexProject() {
2464
2465        CmsProject result = null;
2466        for (CmsSearchIndex index : m_offlineIndexes) {
2467            try {
2468                result = m_adminCms.readProject(index.getProject());
2469
2470                if (!result.isOnlineProject()) {
2471                    break;
2472                }
2473            } catch (Exception e) {
2474                // may be a missconfigured index, ignore
2475                LOG.error(e.getLocalizedMessage(), e);
2476            }
2477        }
2478        return result;
2479    }
2480
2481    /**
2482     * Returns a new thread manager for the indexing threads.<p>
2483     *
2484     * @return a new thread manager for the indexing threads
2485     */
2486    protected CmsIndexingThreadManager getThreadManager() {
2487
2488        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2489    }
2490
2491    /**
2492     * Initializes the available Cms resource types to be indexed.<p>
2493     *
2494     * A map stores document factories keyed by a string representing
2495     * a colon separated list of Cms resource types and/or mimetypes.<p>
2496     *
2497     * The keys of this map are used to trigger a document factory to convert
2498     * a Cms resource into a Lucene index document.<p>
2499     *
2500     * A document factory is a class implementing the interface
2501     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2502     */
2503    protected void initAvailableDocumentTypes() {
2504
2505        CmsSearchDocumentType documenttype = null;
2506        String className = null;
2507        String name = null;
2508        I_CmsDocumentFactory documentFactory = null;
2509        List<String> resourceTypes = null;
2510        List<String> mimeTypes = null;
2511        Class<?> c = null;
2512
2513        m_documentTypes = new HashMap<String, I_CmsDocumentFactory>();
2514
2515        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2516
2517            documenttype = m_documentTypeConfigs.get(i);
2518            name = documenttype.getName();
2519
2520            try {
2521                className = documenttype.getClassName();
2522                resourceTypes = documenttype.getResourceTypes();
2523                mimeTypes = documenttype.getMimeTypes();
2524
2525                if (name == null) {
2526                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2527                }
2528                if (className == null) {
2529                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2530                }
2531                if (resourceTypes.size() == 0) {
2532                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2533                }
2534
2535                try {
2536                    c = Class.forName(className);
2537                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2538                        new Object[] {name});
2539                } catch (ClassNotFoundException exc) {
2540                    throw new CmsIndexException(
2541                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2542                        exc);
2543                } catch (Exception exc) {
2544                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2545                }
2546
2547                if (documentFactory.isUsingCache()) {
2548                    // init cache if used by the factory
2549                    documentFactory.setCache(m_extractionResultCache);
2550                }
2551
2552                for (Iterator<String> key = documentFactory.getDocumentKeys(
2553                    resourceTypes,
2554                    mimeTypes).iterator(); key.hasNext();) {
2555                    m_documentTypes.put(key.next(), documentFactory);
2556                }
2557
2558            } catch (CmsException e) {
2559                if (LOG.isWarnEnabled()) {
2560                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2561                }
2562            }
2563        }
2564    }
2565
2566    /**
2567     * Initializes the configured search indexes.<p>
2568     *
2569     * This initializes also the list of Cms resources types
2570     * to be indexed by an index source.<p>
2571     */
2572    protected void initSearchIndexes() {
2573
2574        CmsSearchIndex index = null;
2575        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2576            index = m_indexes.get(i);
2577            // reset disabled flag
2578            index.setEnabled(true);
2579            // check if the index has been configured correctly
2580            if (index.checkConfiguration(m_adminCms)) {
2581                // the index is configured correctly
2582                try {
2583                    index.initialize();
2584                } catch (Exception e) {
2585                    if (CmsLog.INIT.isWarnEnabled()) {
2586                        // in this case the index will be disabled
2587                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2588                    }
2589                }
2590            }
2591            // output a log message if the index was successfully configured or not
2592            if (CmsLog.INIT.isInfoEnabled()) {
2593                if (index.isEnabled()) {
2594                    CmsLog.INIT.info(
2595                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2596                } else {
2597                    CmsLog.INIT.warn(
2598                        Messages.get().getBundle().key(
2599                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2600                            index,
2601                            index.getProject()));
2602                }
2603            }
2604        }
2605    }
2606
2607    /**
2608     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2609     * after resources have been published.<p>
2610     *
2611     * @param adminCms an OpenCms user context with Admin permissions
2612     * @param publishHistoryId the history ID of the published project
2613     * @param report the report to write the output to
2614     */
2615    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2616
2617        int oldPriority = Thread.currentThread().getPriority();
2618        try {
2619            SEARCH_MANAGER_LOCK.lock();
2620            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2621            List<CmsPublishedResource> publishedResources;
2622            try {
2623                // read the list of all published resources
2624                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2625            } catch (CmsException e) {
2626                LOG.error(
2627                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2628                    e);
2629                return;
2630            }
2631            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2632            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2633
2634            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2635            for (CmsPublishedResource res : publishedResources) {
2636                if (res.isFolder() || res.getState().isUnchanged()) {
2637                    // folders and unchanged resources don't need to be indexed after publish
2638                    continue;
2639                }
2640                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2641                    if (updateResources.contains(res)) {
2642                        // resource may have been added as a sibling of another resource
2643                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2644                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
2645                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
2646                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
2647                        // check it this is a moved resource with source / target info, in this case we need both entries
2648                        if (!hasMoved) {
2649                            // if the resource was moved, we must contain both entries
2650                            updateResources.remove(res);
2651                        }
2652                        // "equals()" implementation of published resource checks for id,
2653                        // so the removed value may have a different "deleted" or "modified" status value
2654                        updateResources.add(res);
2655                    } else {
2656                        // resource not yet contained in the list
2657                        updateResources.add(res);
2658                        // check for the siblings (not for deleted resources, these are already gone)
2659                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
2660                            // this resource has siblings
2661                            try {
2662                                // read siblings from the online project
2663                                List<CmsResource> siblings = adminCms.readSiblings(
2664                                    res.getRootPath(),
2665                                    CmsResourceFilter.ALL);
2666                                Iterator<CmsResource> itSib = siblings.iterator();
2667                                while (itSib.hasNext()) {
2668                                    // check all siblings
2669                                    CmsResource sibling = itSib.next();
2670                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
2671                                    if (!updateResources.contains(sib)) {
2672                                        // ensure sibling is added only once
2673                                        updateResources.add(sib);
2674                                    }
2675                                }
2676                            } catch (CmsException e) {
2677                                // ignore, just use the original resource
2678                                if (LOG.isWarnEnabled()) {
2679                                    LOG.warn(
2680                                        Messages.get().getBundle().key(
2681                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
2682                                            res.getRootPath()),
2683                                        e);
2684                                }
2685                            }
2686                        }
2687                    }
2688                }
2689            }
2690
2691            findRelatedContainerPages(adminCms, updateResources);
2692            if (!updateResources.isEmpty()) {
2693                // sort the resource to update
2694                Collections.sort(updateResources);
2695                // only update the indexes if the list of remaining published resources is not empty
2696                Iterator<CmsSearchIndex> i = m_indexes.iterator();
2697                while (i.hasNext()) {
2698                    CmsSearchIndex index = i.next();
2699                    if (CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
2700                        // only update indexes which have the rebuild mode set to "auto"
2701                        try {
2702                            updateIndex(index, report, updateResources);
2703                        } catch (CmsException e) {
2704                            LOG.error(
2705                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
2706                                e);
2707                        }
2708                    }
2709                }
2710            }
2711            // clean up the extraction result cache
2712            cleanExtractionCache();
2713        } finally {
2714            SEARCH_MANAGER_LOCK.unlock();
2715            Thread.currentThread().setPriority(oldPriority);
2716        }
2717    }
2718
2719    /**
2720     * Updates (if required creates) the index with the given name.<p>
2721     *
2722     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
2723     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
2724     * the index will be fully rebuild.<p>
2725     *
2726     * @param index the index to update or rebuild
2727     * @param report the report to write output messages to
2728     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
2729     *
2730     * @throws CmsException if something goes wrong
2731     */
2732    protected void updateIndex(CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
2733    throws CmsException {
2734
2735        try {
2736            SEARCH_MANAGER_LOCK.lock();
2737
2738            // copy the stored admin context for the indexing
2739            CmsObject cms = OpenCms.initCmsObject(m_adminCms);
2740            // make sure a report is available
2741            if (report == null) {
2742                report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
2743            }
2744
2745            // check if the index has been configured correctly
2746            if (!index.checkConfiguration(cms)) {
2747                // the index is disabled
2748                return;
2749            }
2750
2751            // set site root and project for this index
2752            cms.getRequestContext().setSiteRoot("/");
2753            // switch to the index project
2754            cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
2755
2756            if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
2757                // rebuild the complete index
2758
2759                // create a new thread manager for the indexing threads
2760                CmsIndexingThreadManager threadManager = getThreadManager();
2761
2762                boolean isOfflineIndex = false;
2763                if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
2764                    // disable offline indexing while the complete index is rebuild
2765                    isOfflineIndex = true;
2766                    index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_MANUAL);
2767                    // re-initialize the offline indexes, this will disable this offline index
2768                    initOfflineIndexes();
2769                }
2770
2771                I_CmsIndexWriter writer = null;
2772                try {
2773                    // create a backup of the existing index
2774                    String backup = index.createIndexBackup();
2775                    if (backup != null) {
2776                        index.indexSearcherOpen(backup);
2777                    }
2778
2779                    // create a new index writer
2780                    writer = index.getIndexWriter(report, true);
2781                    if (writer instanceof CmsSolrIndexWriter) {
2782                        try {
2783                            ((CmsSolrIndexWriter)writer).deleteAllDocuments();
2784                        } catch (IOException e) {
2785                            LOG.error(e.getMessage(), e);
2786                        }
2787                    }
2788
2789                    // output start information on the report
2790                    report.println(
2791                        Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
2792                        I_CmsReport.FORMAT_HEADLINE);
2793
2794                    // iterate all configured index sources of this index
2795                    Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
2796                    while (sources.hasNext()) {
2797                        // get the next index source
2798                        CmsSearchIndexSource source = sources.next();
2799                        // create the indexer
2800                        I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
2801                        // new index creation, use all resources from the index source
2802                        indexer.rebuildIndex(writer, threadManager, source);
2803
2804                        // wait for indexing threads to finish
2805                        while (threadManager.isRunning()) {
2806                            try {
2807                                Thread.sleep(500);
2808                            } catch (InterruptedException e) {
2809                                // just continue with the loop after interruption
2810                                LOG.info(e.getLocalizedMessage(), e);
2811                            }
2812                        }
2813
2814                        // commit and optimize the index after each index source has been finished
2815                        try {
2816                            writer.commit();
2817                        } catch (IOException e) {
2818                            if (LOG.isWarnEnabled()) {
2819                                LOG.warn(
2820                                    Messages.get().getBundle().key(
2821                                        Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
2822                                        index.getName(),
2823                                        index.getPath()),
2824                                    e);
2825                            }
2826                        }
2827                        try {
2828                            writer.optimize();
2829                        } catch (IOException e) {
2830                            if (LOG.isWarnEnabled()) {
2831                                LOG.warn(
2832                                    Messages.get().getBundle().key(
2833                                        Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
2834                                        index.getName(),
2835                                        index.getPath()),
2836                                    e);
2837                            }
2838                        }
2839                    }
2840
2841                    if (backup != null) {
2842                        // remove the backup after the files have been re-indexed
2843                        index.indexSearcherClose();
2844                        index.removeIndexBackup(backup);
2845                    }
2846
2847                    // output finish information on the report
2848                    report.println(
2849                        Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
2850                        I_CmsReport.FORMAT_HEADLINE);
2851
2852                } finally {
2853                    if (writer != null) {
2854                        try {
2855                            writer.close();
2856                        } catch (IOException e) {
2857                            if (LOG.isWarnEnabled()) {
2858                                LOG.warn(
2859                                    Messages.get().getBundle().key(
2860                                        Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
2861                                        index.getPath(),
2862                                        index.getName()),
2863                                    e);
2864                            }
2865                        }
2866                    }
2867                    if (isOfflineIndex) {
2868                        // reset the mode of the offline index
2869                        index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_OFFLINE);
2870                        // re-initialize the offline indexes, this will re-enable this index
2871                        initOfflineIndexes();
2872                    }
2873                    // index has changed - initialize the index searcher instance
2874                    index.indexSearcherOpen(index.getPath());
2875                }
2876
2877                // show information about indexing runtime
2878                threadManager.reportStatistics(report);
2879
2880            } else {
2881                updateIndexIncremental(cms, index, report, resourcesToIndex);
2882            }
2883        } finally {
2884            SEARCH_MANAGER_LOCK.unlock();
2885        }
2886    }
2887
2888    /**
2889     * Incrementally updates the given index.<p>
2890     *
2891     * @param cms the OpenCms user context to use for accessing the VFS
2892     * @param index the index to update
2893     * @param report the report to write output messages to
2894     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
2895     *
2896     * @throws CmsException if something goes wrong
2897     */
2898    protected void updateIndexIncremental(
2899        CmsObject cms,
2900        CmsSearchIndex index,
2901        I_CmsReport report,
2902        List<CmsPublishedResource> resourcesToIndex)
2903    throws CmsException {
2904
2905        try {
2906            SEARCH_MANAGER_LOCK.lock();
2907
2908            // update the existing index
2909            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
2910
2911            boolean hasResourcesToDelete = false;
2912            boolean hasResourcesToUpdate = false;
2913
2914            // iterate all configured index sources of this index
2915            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
2916            while (sources.hasNext()) {
2917                // get the next index source
2918                CmsSearchIndexSource source = sources.next();
2919                // create the indexer
2920                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
2921                // collect the resources to update
2922                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
2923                if (!updateData.isEmpty()) {
2924                    // add the update collection to the internal pipeline
2925                    updateCollections.add(updateData);
2926                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
2927                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
2928                }
2929            }
2930
2931            // only start index modification if required
2932            if (hasResourcesToDelete || hasResourcesToUpdate) {
2933                // output start information on the report
2934                report.println(
2935                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
2936                    I_CmsReport.FORMAT_HEADLINE);
2937
2938                I_CmsIndexWriter writer = null;
2939                try {
2940                    // obtain an index writer that updates the current index
2941                    writer = index.getIndexWriter(report, false);
2942
2943                    if (hasResourcesToDelete) {
2944                        // delete the resource from the index
2945                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
2946                        while (i.hasNext()) {
2947                            CmsSearchIndexUpdateData updateCollection = i.next();
2948                            if (updateCollection.hasResourcesToDelete()) {
2949                                updateCollection.getIndexer().deleteResources(
2950                                    writer,
2951                                    updateCollection.getResourcesToDelete());
2952                            }
2953                        }
2954                    }
2955
2956                    if (hasResourcesToUpdate) {
2957                        // create a new thread manager
2958                        CmsIndexingThreadManager threadManager = getThreadManager();
2959
2960                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
2961                        while (i.hasNext()) {
2962                            CmsSearchIndexUpdateData updateCollection = i.next();
2963                            if (updateCollection.hasResourceToUpdate()) {
2964                                updateCollection.getIndexer().updateResources(
2965                                    writer,
2966                                    threadManager,
2967                                    updateCollection.getResourcesToUpdate());
2968                            }
2969                        }
2970
2971                        // wait for indexing threads to finish
2972                        while (threadManager.isRunning()) {
2973                            try {
2974                                Thread.sleep(500);
2975                            } catch (InterruptedException e) {
2976                                // just continue with the loop after interruption
2977                                LOG.info(e.getLocalizedMessage(), e);
2978                            }
2979                        }
2980                    }
2981                } finally {
2982                    // close the index writer
2983                    if (writer != null) {
2984                        try {
2985                            writer.commit();
2986                        } catch (IOException e) {
2987                            LOG.error(
2988                                Messages.get().getBundle().key(
2989                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
2990                                    index.getName(),
2991                                    index.getPath()),
2992                                e);
2993                        }
2994                    }
2995                    // index has changed - initialize the index searcher instance
2996                    index.indexSearcherUpdate();
2997                }
2998
2999                // output finish information on the report
3000                report.println(
3001                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3002                    I_CmsReport.FORMAT_HEADLINE);
3003            }
3004        } finally {
3005            SEARCH_MANAGER_LOCK.unlock();
3006        }
3007    }
3008
3009    /**
3010     * Updates the offline search indexes for the given list of resources.<p>
3011     *
3012     * @param report the report to write the index information to
3013     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3014     */
3015    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3016
3017        CmsObject cms = m_adminCms;
3018        try {
3019            // copy the administration context for the indexing
3020            cms = OpenCms.initCmsObject(m_adminCms);
3021            // set site root and project for this index
3022            cms.getRequestContext().setSiteRoot("/");
3023        } catch (CmsException e) {
3024            LOG.error(e.getLocalizedMessage(), e);
3025        }
3026
3027        Iterator<CmsSearchIndex> j = m_offlineIndexes.iterator();
3028        while (j.hasNext()) {
3029            CmsSearchIndex index = j.next();
3030            if (index.getSources() != null) {
3031                try {
3032                    // switch to the index project
3033                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3034                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3035                } catch (CmsException e) {
3036                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3037                }
3038            }
3039        }
3040    }
3041
3042    /**
3043     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3044     *
3045     * @param adminCms the cms context
3046     * @param containerPages the containerpages
3047     * @param containerPage the container page site path
3048     */
3049    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3050
3051        if (CmsJspTagContainer.isDetailContainersPage(adminCms, containerPage)) {
3052
3053            try {
3054                CmsResource detailRes = adminCms.readResource(
3055                    CmsJspTagContainer.getDetailContentPath(containerPage),
3056                    CmsResourceFilter.IGNORE_EXPIRATION);
3057                containerPages.add(detailRes);
3058            } catch (Throwable e) {
3059                if (LOG.isWarnEnabled()) {
3060                    LOG.warn(e.getLocalizedMessage(), e);
3061                }
3062            }
3063        }
3064    }
3065
3066    /**
3067     * Creates the Solr core container.<p>
3068     *
3069     * @return the created core container
3070     */
3071    private CoreContainer createCoreContainer() {
3072
3073        CoreContainer container = null;
3074        try {
3075            // get the core container
3076            // still no core container: create it
3077            container = CoreContainer.createAndLoad(
3078                Paths.get(m_solrConfig.getHome()),
3079                m_solrConfig.getSolrFile().toPath());
3080            if (CmsLog.INIT.isInfoEnabled()) {
3081                CmsLog.INIT.info(
3082                    Messages.get().getBundle().key(
3083                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3084                        m_solrConfig.getHome(),
3085                        m_solrConfig.getSolrFile().getName()));
3086            }
3087        } catch (Exception e) {
3088            LOG.error(
3089                Messages.get().getBundle().key(
3090                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3091                    m_solrConfig.getSolrFile().getAbsolutePath()),
3092                e);
3093        }
3094        return container;
3095
3096    }
3097
3098    /**
3099     * Returns the report in the given event data, if <code>null</code>
3100     * a new log report is used.<p>
3101     *
3102     * @param event the event to get the report for
3103     *
3104     * @return the report
3105     */
3106    private I_CmsReport getEventReport(CmsEvent event) {
3107
3108        I_CmsReport report = null;
3109        if (event.getData() != null) {
3110            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3111        }
3112        if (report == null) {
3113            report = new CmsLogReport(Locale.ENGLISH, getClass());
3114        }
3115        return report;
3116    }
3117
3118    /**
3119     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3120     *
3121     * @param publishedResources a list of published resources
3122     *
3123     * @return the set of structure ids that satisfy the condition above
3124     */
3125    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3126        List<CmsPublishedResource> publishedResources) {
3127
3128        Set<CmsUUID> result = new HashSet<CmsUUID>();
3129        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3130        for (CmsPublishedResource pubRes : publishedResources) {
3131            if (pubRes.getState().isNew()) {
3132                result.add(pubRes.getStructureId());
3133            }
3134            if (pubRes.getState().isDeleted()) {
3135                deletedSet.add(pubRes.getStructureId());
3136            }
3137        }
3138        result.retainAll(deletedSet);
3139        return result;
3140    }
3141
3142    /**
3143     * Shuts down the Solr core container.<p>
3144     */
3145    private void shutDownSolrContainer() {
3146
3147        if (m_coreContainer != null) {
3148            for (SolrCore core : m_coreContainer.getCores()) {
3149                // do not unload spellcheck core because otherwise the core.properties file is removed
3150                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3151                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3152                    m_coreContainer.unload(core.getName(), false, false, true);
3153                }
3154            }
3155            m_coreContainer.shutdown();
3156            if (CmsLog.INIT.isInfoEnabled()) {
3157                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3158            }
3159            m_coreContainer = null;
3160        }
3161    }
3162
3163}