Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 44 for Extract (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java

                            }
                        }
                    }
                    throw new ExtractException("Failed to extract the content using available extractors.");
                }
    
                @Override
                public int getWeight() {
                    return extractors[0].getWeight();
                }
            };
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.3K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/suggest/util/SuggestUtil.java

                }
            }
            return keywords.toArray(new String[keywords.size()]);
        }
    
        /**
         * Extracts keywords from the given query string based on the specified fields.
         *
         * @param q the query string to parse and extract keywords from
         * @param fields the fields to consider when extracting keywords
         * @return a list of unique keywords extracted from the query string
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Mon Sep 01 13:33:03 UTC 2025
    - 17.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

                }
    
                return extractData;
            } catch (final IOException e) {
                throw new ExtractException("Could not extract a content.", e);
            } finally {
                FileUtil.deleteInBackground(inputFile);
                FileUtil.deleteInBackground(outputFile);
            }
        }
    
        String getFileName(final String resourceName) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

    import jakarta.annotation.Resource;
    
    /**
     * <p>
     * XmlTransformer is a class that extends AbstractTransformer to transform XML documents into a specific format for indexing.
     * It uses XPath expressions to extract data from the XML and stores it in a ResultData object.
     * </p>
     *
     * <p>
     * This class provides several configuration options to customize the XML parsing process, such as:
     * </p>
     * <ul>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

    import jakarta.annotation.Resource;
    
    /**
     * {@link HtmlXpathExtractor} is an implementation of the {@link org.codelibs.fess.crawler.extractor.Extractor} interface.
     * It uses XPath expressions to extract text content from HTML documents.
     * <p>
     * This class provides methods to configure the XPath expressions, parser features, and properties.
     * It also includes caching mechanism for XPathAPI instances to improve performance.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  6. src/test/java/org/codelibs/fess/suggest/util/SuggestUtilTest.java

            String[] fields = { "content" };
            List<String> keywords = SuggestUtil.getKeywords(query, fields);
            assertNotNull(keywords);
            // Should extract all unique terms
            assertTrue(keywords.size() > 0);
        }
    
        @Test
        public void testCreateBulkLineWithMinimalItem() {
            // Test with minimal SuggestItem
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Mon Sep 01 13:33:03 UTC 2025
    - 18.2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                httpClientPropertyMap.put(name, value);
            }
        }
    
        /**
         * Processes robots.txt for the given URL.
         * This method fetches and parses the robots.txt file to extract disallow/allow rules
         * and sitemap information.
         *
         * @param url The URL to process robots.txt for
         */
        protected void processRobotsTxt(final String url) {
            if (StringUtil.isBlank(url)) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from Microsoft PowerPoint documents.
     */
    public class MsPowerPointExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsPowerPointExtractor instance.
         */
        public MsPowerPointExtractor() {
            super();
        }
    
        /**
         * Extracts text from the PowerPoint input stream.
         * @param in The input stream.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts the filename from the parameters.
     */
    public class FilenameExtractor extends AbstractExtractor {
    
        /**
         * Constructs a new FilenameExtractor.
         */
        public FilenameExtractor() {
            // Default constructor
        }
    
        /**
         * Extracts the filename from the parameters.
         * @param in The input stream (not used).
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

    /**
     * A response processor implementation that handles sitemaps.
     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
     * from each sitemap, and creates a new {@link RequestData} object for each URL.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
Back to top