Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 64 for extracted (0.9 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    
    /**
     * PdfExtractor extracts text content from PDF files using Apache PDFBox.
     * It supports password-protected PDFs and can extract embedded documents and annotations.
     *
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 12.8K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

         * @param outExt the output file extension
         * @return the extracted text content
         * @throws ExtractException if an error occurs while reading the file
         */
        protected String getOutputContent(final File outputFile, final String outExt) {
            final Extractor extractor = getExtractor(outExt);
            if (extractor != null) {
                final Map<String, String> params = new HashMap<>();
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorBuilder.java

                dfos = out;
                CopyUtil.copy(in, out);
                out.flush();
    
                Extractor extractor = StringUtil.isBlank(mimeType) ? null : extractorFactory.getExtractor(mimeType);
                if (extractor == null) {
                    final String detectedMimeType = getMimeType(out);
                    extractor = extractorFactory.getExtractor(detectedMimeType);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java

        public void setMailProperties(final Properties mailProperties) {
            this.mailProperties = mailProperties;
        }
    
        /**
         * Extracts the body text from a MIME message.
         *
         * @param message the MIME message to extract text from
         * @return the extracted body text
         * @throws ExtractException if extraction fails
         */
        protected String getBodyText(final MimeMessage message) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.6K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/helper/PermissionHelper.java

            this.userPrefix = userPrefix;
        }
    
        /**
         * Extracts role type information from SMB (Server Message Block) response data.
         * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs.
         *
         * @param responseData the response data containing SMB metadata
         * @return a list of role type strings extracted from the SMB permissions
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 13 05:54:52 UTC 2025
    - 15.4K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java

        }
    
        public void test_contentGetterSetter() {
            // Test content getter/setter
            ExtractData data = new ExtractData();
    
            String content = "This is extracted content";
            data.setContent(content);
            assertEquals(content, data.getContent());
    
            String newContent = "New content";
            data.setContent(newContent);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                    logger.warn("Failed to close HTTP client for API extractor", e);
                }
            }
        }
    
        /**
         * Extracts text from the input stream using the API endpoint.
         *
         * @param in the input stream to extract text from
         * @param params additional parameters
         * @return the extracted data
         * @throws ExtractException if extraction fails
         */
        @Override
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                throw new RobotsTxtException("Failed to parse robots.txt.", e);
            }
        }
    
        /**
         * Extracts the value from a line using the given pattern.
         * @param pattern the pattern to match against
         * @param line the line to extract the value from
         * @return the extracted value, or null if no match
         */
        protected String getValue(final Pattern pattern, final String line) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java

    import org.codelibs.fess.crawler.Constants;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content and metadata from CSV files.
     * This extractor provides better structured data extraction compared to Tika's generic text extraction.
     *
     * <p>Features:
     * <ul>
     *   <li>Automatic delimiter detection (comma, tab, semicolon, pipe)</li>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 12.8K bytes
    - Viewed (0)
Back to top