- Sort Score
- Result 10 results
- Languages All
Results 21 - 30 of 44 for Extract (0.03 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java
} } } throw new ExtractException("Failed to extract the content using available extractors."); } @Override public int getWeight() { return extractors[0].getWeight(); } }; }Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 7.3K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/suggest/util/SuggestUtil.java
} } return keywords.toArray(new String[keywords.size()]); } /** * Extracts keywords from the given query string based on the specified fields. * * @param q the query string to parse and extract keywords from * @param fields the fields to consider when extracting keywords * @return a list of unique keywords extracted from the query stringRegistered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Mon Sep 01 13:33:03 UTC 2025 - 17.4K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
} return extractData; } catch (final IOException e) { throw new ExtractException("Could not extract a content.", e); } finally { FileUtil.deleteInBackground(inputFile); FileUtil.deleteInBackground(outputFile); } } String getFileName(final String resourceName) {Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 16K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java
import jakarta.annotation.Resource; /** * <p> * XmlTransformer is a class that extends AbstractTransformer to transform XML documents into a specific format for indexing. * It uses XPath expressions to extract data from the XML and stores it in a ResultData object. * </p> * * <p> * This class provides several configuration options to customize the XML parsing process, such as: * </p> * <ul>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 23.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java
import jakarta.annotation.Resource; /** * {@link HtmlXpathExtractor} is an implementation of the {@link org.codelibs.fess.crawler.extractor.Extractor} interface. * It uses XPath expressions to extract text content from HTML documents. * <p> * This class provides methods to configure the XPath expressions, parser features, and properties. * It also includes caching mechanism for XPathAPI instances to improve performance.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 10.3K bytes - Viewed (0) -
src/test/java/org/codelibs/fess/suggest/util/SuggestUtilTest.java
String[] fields = { "content" }; List<String> keywords = SuggestUtil.getKeywords(query, fields); assertNotNull(keywords); // Should extract all unique terms assertTrue(keywords.size() > 0); } @Test public void testCreateBulkLineWithMinimalItem() { // Test with minimal SuggestItem
Registered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Mon Sep 01 13:33:03 UTC 2025 - 18.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java
httpClientPropertyMap.put(name, value); } } /** * Processes robots.txt for the given URL. * This method fetches and parses the robots.txt file to extract disallow/allow rules * and sitemap information. * * @param url The URL to process robots.txt for */ protected void processRobotsTxt(final String url) { if (StringUtil.isBlank(url)) {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 52.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java
import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts text content from Microsoft PowerPoint documents. */ public class MsPowerPointExtractor extends AbstractExtractor { /** * Creates a new MsPowerPointExtractor instance. */ public MsPowerPointExtractor() { super(); } /** * Extracts text from the PowerPoint input stream. * @param in The input stream.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java
import org.codelibs.fess.crawler.exception.ExtractException; /** * Extracts the filename from the parameters. */ public class FilenameExtractor extends AbstractExtractor { /** * Constructs a new FilenameExtractor. */ public FilenameExtractor() { // Default constructor } /** * Extracts the filename from the parameters. * @param in The input stream (not used).Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java
/** * A response processor implementation that handles sitemaps. * It parses the response body as a SitemapSet, extracts URLs from the sitemaps, * and adds them as child URLs to be crawled. * * <p> * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text. * It then iterates through the sitemaps in the SitemapSet, extracts the URL * from each sitemap, and creates a new {@link RequestData} object for each URL.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 3.4K bytes - Viewed (0)