Search Options

Results per page
Sort
Preferred Languages
Advance

Results 51 - 60 of 88 for source (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/filter/impl/UrlFilterImpl.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.filter.UrlFilter;
    import org.codelibs.fess.crawler.service.UrlFilterService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlFilter} interface.
     * This class provides functionality to filter URLs based on include and exclude patterns.
     * It uses a {@link UrlFilterService} to manage the URL filtering rules.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java

     * supports dependency injection via the {@link Resource} annotation.
     * </p>
     */
    public class ExtractorFactory {
    
        /** Logger instance for this class */
        private static final Logger logger = LogManager.getLogger(ExtractorFactory.class);
    
        /** Container for managing crawler components */
        @Resource
        protected CrawlerContainer crawlerContainer;
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.3K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientFactory.java

    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.Resource;
    
    /**
     * A factory class for managing and creating crawler clients based on URL patterns.
     * This class implements AutoCloseable to properly handle resource cleanup.
     *
     * <p>The factory maintains a map of regular expression patterns to crawler clients,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            try {
                @SuppressWarnings("resource")
                final SlideShowExtractor<HSLFShape, HSLFTextParagraph> extractor = new SlideShowExtractor<>(new HSLFSlideShow(in));
                return new ExtractData(extractor.getText());
            } catch (final IOException e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImpl.java

     * </p>
     *
     * <p>
     * Usage:
     * </p>
     * <pre>
     * MimeTypeHelperImpl mimeTypeHelper = new MimeTypeHelperImpl();
     * String contentType = mimeTypeHelper.getContentType(inputStream, filename);
     * </pre>
     */
    public class MimeTypeHelperImpl implements MimeTypeHelper {
        /** The resource name for the MIME types configuration file. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java

            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            try {
                @SuppressWarnings("resource")
                final PublisherTextExtractor publisherTextExtractor = new PublisherTextExtractor(in);
                return new ExtractData(publisherTextExtractor.getText());
            } catch (final IOException e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlFilterServiceImpl.java

    import java.util.List;
    import java.util.regex.Pattern;
    
    import org.codelibs.fess.crawler.helper.MemoryDataHelper;
    import org.codelibs.fess.crawler.service.UrlFilterService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlFilterService} interface.
     * This class provides methods for managing URL filtering rules,
     * including adding include and exclude URL patterns, deleting patterns,
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.2K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java

    import java.util.List;
    import java.util.Map;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.fess.crawler.container.CrawlerContainer;
    
    import jakarta.annotation.Resource;
    
    /**
     * Creates and manages crawler clients for web crawling operations.
     * This class handles the registration and loading of crawler client factories and their associated clients.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

            final String outExt = extensionMap.get(extension);
            return outExt == null ? "txt" : outExt;
        }
    
        /**
         * Extracts the file name from a resource name.
         *
         * @param resourceName the resource name
         * @return the file name
         */
        private String getFileName(final String resourceName) {
            final String name = resourceName.replaceAll("/+$", "");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

     * This class provides methods to extract text from an input stream, handling different scenarios such as:
     * </p>
     * <ul>
     *   <li>Normalizing text content</li>
     *   <li>Handling resource names and content types</li>
     *   <li>Retrying extraction without resource name or content type if the initial attempt fails</li>
     *   <li>Extracting text from metadata if the main content extraction fails</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
Back to top