Search Options

Results per page
Sort
Preferred Languages
Advance

Results 51 - 60 of 70 for resources (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExtractException.java

     */
    package org.codelibs.fess.crawler.exception;
    
    /**
     * Exception thrown during the extraction process in the crawler.
     * This exception indicates a failure or error that occurred while extracting content from a crawled resource.
     * It extends {@link org.codelibs.fess.crawler.exception.CrawlerSystemException} and provides constructors
     * to handle different scenarios such as wrapping another exception or providing a specific error message.
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 3K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java

            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            try {
                @SuppressWarnings("resource")
                final org.apache.poi.hssf.extractor.ExcelExtractor excelExtractor =
                        new org.apache.poi.hssf.extractor.ExcelExtractor(new HSSFWorkbook(in));
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java

    import org.codelibs.fess.crawler.exception.CrawlingAccessException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    
    import jakarta.annotation.Resource;
    
    /**
     * TextTransformer is a class that transforms a ResponseData object into a ResultData object containing the extracted text content.
     * It uses an Extractor to extract the text from the response body based on the MIME type.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

    import org.codelibs.fess.crawler.entity.ResultData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    
    /**
     * <p>
     * FileTransformer stores the content of a crawled resource as a file on the file system.
     * It extends HtmlTransformer and provides functionality to:
     * </p>
     * <ul>
     *     <li>Specify a base directory for storing files.</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  5. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/OpenSearchUrlQueueService.java

    import org.opensearch.search.sort.SortBuilders;
    import org.opensearch.search.sort.SortOrder;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    import jakarta.annotation.Resource;
    
    /**
     * OpenSearchUrlQueueService is an implementation of {@link UrlQueueService} for OpenSearch.
     *
     * @author shinsuke
     *
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 17K bytes
    - Viewed (1)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    import com.google.common.cache.CacheBuilder;
    import com.google.common.cache.CacheLoader;
    import com.google.common.cache.LoadingCache;
    
    import jakarta.annotation.Resource;
    
    /**
     * {@link HtmlXpathExtractor} is an implementation of the {@link org.codelibs.fess.crawler.extractor.Extractor} interface.
     * It uses XPath expressions to extract text content from HTML documents.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlQueueServiceImpl.java

    import org.codelibs.fess.crawler.entity.UrlQueueImpl;
    import org.codelibs.fess.crawler.helper.MemoryDataHelper;
    import org.codelibs.fess.crawler.service.UrlQueueService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlQueueService} interface.
     * This class provides methods for managing a queue of URLs to be crawled,
     * including adding, deleting, and retrieving URLs from the queue.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/AbstractCrawlerClient.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    
    import jakarta.annotation.Resource;
    
    /**
     * Abstract base class for CrawlerClient implementations.
     * Provides common functionality for handling initialization parameters,
     * content length checks, and default method implementations.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 9.7K bytes
    - Viewed (10)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

         */
        public RobotsTxt parse(final InputStream stream, final String charsetName) {
            if (!enabled) {
                return null;
            }
    
            try {
                @SuppressWarnings("resource")
                final BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(stream), charsetName));
    
                String line;
                final RobotsTxt robotsTxt = new RobotsTxt();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/fs/FileSystemClient.java

    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.helper.ContentLengthHelper;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    
    import jakarta.annotation.Resource;
    
    /**
     * FileSystemClient is CrawlerClient implementation to crawl files on a file
     * system.
     *
     * @author shinsuke
     *
     */
    public class FileSystemClient extends AbstractCrawlerClient {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.8K bytes
    - Viewed (0)
Back to top