Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 159 for comment (0.02 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

     * and cache file size to optimize the extraction process.
     *
     * <p>
     * The main purpose of this class is to simplify the extraction process by providing a fluent interface
     * for configuring the extraction parameters and handling the underlying complexities of content processing,
     * such as MIME type detection, extractor selection, and content length validation.
     * </p>
     *
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                return m.group(1);
            }
            return null;
        }
    
        /**
         * Strips comments from a line (everything after '#' character).
         * @param line the line to strip comments from
         * @return the line without comments
         */
        protected String stripComment(final String line) {
            final int commentIndex = line.indexOf('#');
            if (commentIndex != -1) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/BinaryTransformer.java

     */
    package org.codelibs.fess.crawler.transformer.impl;
    
    import java.io.BufferedInputStream;
    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    
    import org.apache.commons.io.IOUtils;
    import org.codelibs.fess.crawler.entity.AccessResultData;
    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.ResultData;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/AbstractCrawlerClient.java

        public static final String ACCESS_TIMEOUT_PROPERTY = "accessTimeout";
    
        /** The property name for maximum content length. */
        public static final String MAX_CONTENT_LENGTH = "maxContentLength";
    
        /** The property name for maximum cached content size. */
        public static final String MAX_CACHED_CONTENT_SIZE = "maxCachedContentSize";
    
        /** The crawler container. */
        @Resource
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 9.7K bytes
    - Viewed (10)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

    import org.apache.pdfbox.pdmodel.PDPage;
    import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
    import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
    import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
    import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification;
    import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

        }
    
        /**
         * Gets the extracted content.
         *
         * @return the extracted content
         */
        public String getContent() {
            return content;
        }
    
        /**
         * Sets the extracted content.
         *
         * @param content the content to set
         */
        public void setContent(final String content) {
            this.content = content;
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            Map<String, String> params = new HashMap<>();
            params.put("Content-Type", "text/plain");
            final ExtractData extractData = tikaExtractor.getText(in, params);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
        // TODO tika needs to support pdfbox 2.0
        //    public void test_getTika_pdf() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/suggest/converter/KatakanaConverter.java

     * check if a tokenizer is enabled. It also includes methods to create a token
     * stream and extract reading information from the stream's attributes, although
     * the tokenizer-related functionality is currently commented out.
     * </p>
     */
    public class KatakanaConverter implements ReadingConverter {
    
        /** The transliterator for Hiragana-Katakana conversion. */
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Fri Jul 04 14:00:23 UTC 2025
    - 6.1K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractorTest.java

            assertTrue(content.contains("画像1"));
            assertTrue(content.contains("画像2"));
            assertTrue(content.contains("タイトル1"));
            assertTrue(content.contains("タイトル2"));
            assertTrue(content.contains("リンク1"));
        }
    
        public void test_getHtml_empty() {
            final InputStream in = new ByteArrayInputStream("".getBytes());
            final String content = htmlXpathExtractor.getText(in, null).getContent();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.2K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

            params.put("resourceName", "test.odt");
            ExtractData extractData = jodExtractor.getText(in, params);
            String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
        public void test_getText_ooow_as() {
            InputStream in = ResourceUtil.getResourceAsStream("extractor/ooo/test_as.odt");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.5K bytes
    - Viewed (0)
Back to top