froe - Code Search

fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/entity/OpenSearchAccessResult.java

    private boolean initializedData = false;

    /**
     * Initializes the access result with response data and result data.
     *
     * @param responseData The response data from the crawl operation.
     * @param resultData The result data from content processing.
     */
    @Override
    public void init(final ResponseData responseData, final ResultData resultData) {

        setCreateTime(System.currentTimeMillis());

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 6.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

        }
        return null;
    }

    /**
     * Extracts URLs from HTML tag attributes using XPath.
     *
     * @param url the base URL for resolving relative URLs
     * @param document the document to extract URLs from
     * @param xpath the XPath expression to select elements
     * @param attr the attribute name to extract URLs from
     * @param encoding the character encoding to use

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 28.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/net/protocol/storage/Handler.java

     * This class handles the authentication, connection management, and data retrieval
     * from storage buckets and objects.
     *
     * <p>
     * The connection extracts bucket and object names from the URL and uses environment
     * variables for authentication and endpoint configuration.
     * </p>
     */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 10.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

 * </p>
 *
 * <p>
 * The crawling process involves the following steps:
 * </p>
 * <ol>
 *   <li>Fetching a URL from the queue using {@link UrlQueueService#poll(String)}.</li>
 *   <li>Checking if the URL is valid using {@link #isValid(UrlQueue)}.</li>
 *   <li>Accessing the content using a {@link CrawlerClient} obtained from {@link CrawlerClientFactory}.</li>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 20.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/client/ftp/FtpClient.java

        return processRequest(uri, true);
    }

    /**
     * Processes an FTP request to retrieve data from the specified URI.
     * This method handles the complete FTP request lifecycle including timeout management,
     * connection setup, and data retrieval.
     *
     * @param uri The URI to retrieve data from
     * @param includeContent Whether to include the actual content in the response

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 39.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/pool/CrawlerPooledObjectFactory.java

    /**
     * The name of the component to be retrieved from the CrawlerContainer.
     */
    protected String componentName;

    /**
     * The listener that is called when a pooled object is destroyed.
     */
    protected OnDestroyListener<T> onDestroyListener;

    /**
     * Creates a new object instance from the crawler container.

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 3.9K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

/**
 * Extracts text content from Microsoft PowerPoint documents.
 */
public class MsPowerPointExtractor extends AbstractExtractor {

    /**
     * Creates a new MsPowerPointExtractor instance.
     */
    public MsPowerPointExtractor() {
        super();
    }

    /**
     * Extracts text from the PowerPoint input stream.
     * @param in The input stream.

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 2.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java

 * and registration with the ExtractorFactory.
 *
 * <p>
 * This class handles the retrieval of essential crawler components like
 * {@link MimeTypeHelper} and {@link ExtractorFactory} from the
 * {@link CrawlerContainer}. It also provides a convenient method for
 * registering the extractor with the {@link ExtractorFactory}.
 * </p>
 *
 * <p>

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 4.2K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

 *   <li><b>trimSpaceEnabled:</b> A flag to enable or disable trimming of whitespace from extracted values.</li>
 *   <li><b>charsetName:</b> The character encoding for the output XML.</li>
 *   <li><b>dataClass:</b> The class type to return from the {@link #getData(AccessResultData)} method.</li>
 * </ul>
 *
 */
public class XpathTransformer extends HtmlTransformer {

    /**

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 13.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/service/UrlFilterService.java

     * @param url the URL to be excluded
     */
    void addExcludeUrlFilter(String sessionId, String url);

    /**
     * Adds a list of URLs to be excluded from crawling for a specific session.
     *
     * @param sessionId the ID of the session for which the URLs should be excluded
     * @param urlList the list of URLs to be excluded
     */

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 3.1K bytes

- Viewed (0)

Search Options