Search Options

Results per page
Sort
Preferred Languages
Advance

Results 61 - 70 of 154 for extracted (0.5 sec)

  1. src/main/java/org/codelibs/fess/crawler/transformer/FessFileTransformer.java

                throw new FessSystemException("Could not find extractorFactory.");
            }
            final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
            if (logger.isDebugEnabled()) {
                logger.debug("url={}, extractor={}", responseData.getUrl(), extractor);
            }
            return extractor;
        }
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.5K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    
    import org.codelibs.core.io.ResourceUtil;
    import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractorTest.java

            csvExtractor.setMaxRows(2);
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/csv/test.csv");
            final ExtractData extractData = csvExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            final String rowCount = extractData.getValues("row_count")[0];
            // Should extract 2 data rows (header doesn't count toward maxRows)
            assertEquals("2", rowCount);
        }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 5.3K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.util.regex.Pattern;
    
    /**
     * Extracts text content from XML documents.
     */
    public class XmlExtractor extends AbstractXmlExtractor {
    
        /**
         * Creates a new XmlExtractor instance.
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  6. CLAUDE.md

    ```
    
    4. **Add tests**: Unit + integration
    
    ### Adding a Content Extractor
    
    1. **Implement `Extractor`**:
    ```java
    public class MyExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(InputStream in, Map<String, String> params) {
            ExtractData data = new ExtractData();
            // Extract text
            data.setContent(extractedText);
            return data;
        }
    }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

        /**
         * Constructs a new CommandExtractor.
         */
        public CommandExtractor() {
            // NOP
        }
    
        /*
         * (non-Javadoc)
         *
         * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream,
         * java.util.Map)
         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 16.1K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/helper/RelatedQueryHelper.java

            this.relatedQueryMap = relatedQueryMap;
            return relatedQueryMap.size();
        }
    
        /**
         * Extracts the virtual host key from a RelatedQuery entity.
         * If the virtual host is blank or null, returns an empty string.
         *
         * @param entity the RelatedQuery entity to extract the host key from
         * @return the virtual host key, or empty string if blank or null
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java

        /** The key of the message: Crawling/Displaying */
        public static final String LABELS_pathmap_pt_both = "{labels.pathmap_pt_both}";
    
        /** The key of the message: Extracted URL Conversion */
        public static final String LABELS_pathmap_pt_stored = "{labels.pathmap_pt_stored}";
    
        /** The key of the message: Regular Name */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 156.4K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

            assertContentType("application/pdf", "extractor/test.pdf", "hoge.pdf");
    
            assertContentType("application/gzip", "extractor/gz/test.tar.gz", "hoge.tar.gz");
            assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip");
            assertContentType("application/x-lharc", "extractor/lha/test.lzh", "hoge.lzh"); // TODO is it correct?
    
            assertContentType("application/xml", "extractor/test.mm", "hoge.mm");
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 11.6K bytes
    - Viewed (0)
Back to top