Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 20 for USING (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

     * </p>
     * <ol>
     *   <li>Fetching a URL from the queue using {@link UrlQueueService#poll(String)}.</li>
     *   <li>Checking if the URL is valid using {@link #isValid(UrlQueue)}.</li>
     *   <li>Accessing the content using a {@link CrawlerClient} obtained from {@link CrawlerClientFactory}.</li>
     *   <li>Processing the response using a {@link ResponseProcessor} associated with a {@link Rule}.</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

                        if (logger.isDebugEnabled()) {
                            logger.debug("Using default extractor {} for MIME type {}", extractorName, mimeType);
                        }
                        extractor = crawlerContainer.getComponent(extractorName);
                    } else if (logger.isDebugEnabled()) {
                        logger.debug("Using {} for detected MIME type {}, not {}", extractor.getClass().getName(), detectedMimeType, mimeType);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  3. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/AbstractCrawlerService.java

                }
            } else if (logger.isDebugEnabled()) {
                logger.debug("{} mapping exists.", index);
            }
        }
    
        /**
         * Extracts a Date object from the source map using the specified field name.
         * Handles various date formats including Date objects, timestamp numbers, and ISO date strings.
         *
         * @param sourceMap The source map containing the date field.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 34.2K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/smb1/SmbClient.java

    import org.codelibs.jcifs.smb1.util.LogStream;
    
    /**
     * The {@link SmbClient} class is a crawler client implementation for accessing files and directories
     * on SMB (Server Message Block) shares using the SMB1 protocol. It extends {@link AbstractCrawlerClient} and utilizes the JCIFS library
     * to interact with SMB resources.
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Sep 18 09:30:45 UTC 2025
    - 23K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

    import org.xml.sax.SAXException;
    
    import jakarta.annotation.PostConstruct;
    
    /**
     * <p>
     * The {@link TikaExtractor} class is responsible for extracting text content and metadata from various file formats
     * using the Apache Tika library. It extends {@link PasswordBasedExtractor} to handle password-protected files.
     * </p>
     *
     * <p>
     * This class provides methods to extract text from an input stream, handling different scenarios such as:
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

    import org.jodconverter.local.LocalConverter;
    
    import jakarta.annotation.PostConstruct;
    import jakarta.annotation.PreDestroy;
    
    /**
     * Extracts text content from various document formats using JODConverter.
     */
    public class JodExtractor extends AbstractExtractor {
        /** Logger for this class. */
        private static final Logger logger = LogManager.getLogger(JodExtractor.class);
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/storage/Handler.java

    /**
     * Handler for the "storage" protocol, allowing access to objects stored in a MinIO-compatible storage service.
     * This handler extends {@link URLStreamHandler} to provide a way to open connections to storage objects
     * using URLs with the "storage" protocol.
     *
     * <p>
     * The URL format is expected to be: {@code storage://bucketName/objectName}.
     * The bucket name and object name are extracted from the URL.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    
    /**
     * PdfExtractor extracts text content from PDF files using Apache PDFBox.
     * It supports password-protected PDFs and can extract embedded documents and annotations.
     *
     * <p>The extractor runs text extraction in a separate thread with a configurable timeout
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/form/FormScheme.java

    import org.codelibs.fess.crawler.Constants;
    
    /**
     * The FormScheme class implements the AuthScheme interface to provide
     * form-based authentication for HTTP clients. It handles the process of
     * obtaining a token and logging in using the provided credentials.
     *
     * <p>This class supports both GET and POST methods for token and login
     * requests. It also allows for the replacement of placeholders in URLs and
     * parameters with actual credentials.
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14.3K bytes
    - Viewed (1)
  10. README.md

    - **CrawlerContext**: Execution context and configuration
    - **CrawlerThread**: Individual crawler thread implementation
    
    #### Client Architecture
    - **HcHttpClient**: HTTP/HTTPS client using Apache HttpComponents
    - **FileSystemClient**: File system access
    - **FtpClient**: FTP protocol support
    - **SmbClient**: SMB/CIFS network shares
    - **StorageClient**: Cloud storage integration
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
Back to top