Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 154 for extracted (0.32 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java

     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.Map;
    
    import org.codelibs.core.lang.StringUtil;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts the filename from the parameters as the content.
     *
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 2.7K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from TAR archives.
     */
    public class TarExtractor extends AbstractExtractor {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/helper/PermissionHelper.java

            this.userPrefix = userPrefix;
        }
    
        /**
         * Extracts role type information from SMB (Server Message Block) response data.
         * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs.
         *
         * @param responseData the response data containing SMB metadata
         * @return a list of role type strings extracted from the SMB permissions
         */
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Nov 13 05:54:52 UTC 2025
    - 15.4K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/suggest/index/contents/ContentsParser.java

                ReadingConverter readingConverter, Normalizer normalizer);
    
        /**
         * Parses a document and extracts suggest items based on the provided fields and converters.
         *
         * @param document The document to parse, represented as a map of field names to values.
         * @param fields The fields to extract from the document.
         * @param tagFieldNames The names of the fields that contain tags.
    Registered: Sat Dec 20 13:04:59 UTC 2025
    - Last Modified: Sat Mar 15 06:51:20 UTC 2025
    - 4.1K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/entity/ExtractDataTest.java

        }
    
        public void test_contentGetterSetter() {
            // Test content getter/setter
            ExtractData data = new ExtractData();
    
            String content = "This is extracted content";
            data.setContent(content);
            assertEquals(content, data.getContent());
    
            String newContent = "New content";
            data.setContent(newContent);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 9.9K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractorTest.java

            jsonExtractor.setExtractMetadata(false);
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/json/test.json");
            final ExtractData extractData = jsonExtractor.getText(in, null);
            CloseableUtil.closeQuietly(in);
    
            // Verify no metadata extracted
            assertNull(extractData.getValues("title"));
            assertNull(extractData.getValues("author"));
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 4.7K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  8. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

            }
            resultData.setEncoding(charsetName);
        }
    
        /**
         * Normalizes the extracted data, particularly handling title normalization.
         *
         * @param responseData the response data from crawling
         * @param dataMap the data map containing extracted field values
         */
        protected void normalizeData(final ResponseData responseData, final Map<String, Object> dataMap) {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Dec 12 13:58:40 UTC 2025
    - 54.6K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                    logger.warn("Failed to close HTTP client for API extractor", e);
                }
            }
        }
    
        /**
         * Extracts text from the input stream using the API endpoint.
         *
         * @param in the input stream to extract text from
         * @param params additional parameters
         * @return the extracted data
         * @throws ExtractException if extraction fails
         */
        @Override
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

                throw new RobotsTxtException("Failed to parse robots.txt.", e);
            }
        }
    
        /**
         * Extracts the value from a line using the given pattern.
         * @param pattern the pattern to match against
         * @param line the line to extract the value from
         * @return the extracted value, or null if no match
         */
        protected String getValue(final Pattern pattern, final String line) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 12:52:01 UTC 2025
    - 11.4K bytes
    - Viewed (0)
Back to top