Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 363 for extractor (0.04 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractorTest.java

            final String content = htmlXpathExtractor.getText(in, null).getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
        public void test_getHtml_sjis() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.html");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 4.2K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 9.5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java

                    logger.warn("Failed to close HTTP client for API extractor", e);
                }
            }
        }
    
        /**
         * Extracts text from the input stream using the API endpoint.
         *
         * @param in the input stream to extract text from
         * @param params additional parameters
         * @return the extracted data
         * @throws ExtractException if extraction fails
         */
        @Override
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 12.2K bytes
    - Viewed (0)
  4. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractorTest.java

            final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.xml");
            final String content = xmlExtractor.getText(in, null).getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
        }
    
        public void test_getXml_entity() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_entity.xml");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.4K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

            } finally {
                xpathAPI.remove();
            }
        }
    
        /**
         * Extracts strings from a document using the specified XPath expression.
         *
         * @param document the DOM document to extract strings from
         * @param path the XPath expression to evaluate
         * @return an array of strings extracted from the document
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractor.java

     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.File;
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.List;
    
    import org.codelibs.fess.crawler.container.CrawlerContainer;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.extractor.Extractor;
    import org.codelibs.fess.crawler.extractor.ExtractorFactory;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 4.6K bytes
    - Viewed (0)
  8. build-logic/documentation/src/main/groovy/gradlebuild/docs/dsl/source/ClassMetaDataUtil.java

    import java.io.File;
    import java.util.HashSet;
    import java.util.Set;
    
    class ClassMetaDataUtil {
        static void extractFromMetadata(File metaData, Set<String> excludedPackagePatterns, Action<ClassMetaData> extractor) {
            SimpleClassMetaDataRepository<ClassMetaData> repository = new SimpleClassMetaDataRepository<>();
            repository.load(metaData);
    
            final Set<String> excludedPrefixes = new HashSet<>();
    Registered: Wed Dec 31 11:36:14 UTC 2025
    - Last Modified: Wed Dec 09 08:14:05 UTC 2020
    - 2.4K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java

            final Extractor extractor = new Extractor() {
                public ExtractData getText(final InputStream in, final Map<String, String> params) {
                    return null;
                }
            };
    
            assertNull(extractorFactory.getExtractor("test"));
            extractorFactory.addExtractor("test", extractor);
            assertEquals(extractor, extractorFactory.getExtractor("test"));
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  10. CLAUDE.md

    ```
    
    4. **Add tests**: Unit + integration
    
    ### Adding a Content Extractor
    
    1. **Implement `Extractor`**:
    ```java
    public class MyExtractor extends AbstractExtractor {
        @Override
        public ExtractData getText(InputStream in, Map<String, String> params) {
            ExtractData data = new ExtractData();
            // Extract text
            data.setContent(extractedText);
            return data;
        }
    }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 28 17:31:34 UTC 2025
    - 10.7K bytes
    - Viewed (0)
Back to top