Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 247 for parser (0.21 sec)

  1. src/main/resources/tika.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <properties>
      <parsers>
        <parser class="org.apache.tika.parser.DefaultParser">
          <parser-exclude class="org.apache.tika.parser.ocr.TesseractOCRParser"/>
        </parser>
      </parsers>
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Mon Feb 24 12:59:41 UTC 2020
    - 241 bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java

        /** Whether to extract link URLs as metadata. */
        protected boolean extractLinks = false;
    
        /** Markdown parser with extensions. */
        protected Parser parser;
    
        /** Text content renderer. */
        protected TextContentRenderer textRenderer;
    
        /**
         * Constructs a new MarkdownExtractor and initializes the parser.
         */
        public MarkdownExtractor() {
            super();
            initializeParser();
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 8.2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

            final DOMParser parser = getDomParser();
            try (final Reader reader = new StringReader(content)) {
                parser.parse(new InputSource(reader));
            } catch (final Exception e) {
                logger.warn("Failed to parse the content.", e);
                return new ExtractData(extractString(content));
            }
    
            final Document document = parser.getDocument();
            try {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

            }
        }
    
        /**
         * Creates and configures a DOM parser with the specified features and properties.
         *
         * @return a configured DOM parser instance
         * @throws CrawlerSystemException if the parser configuration is invalid
         */
        protected DOMParser getDomParser() {
            try {
                final DOMParser parser = new DOMParser();
                // feature
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Oct 04 08:47:19 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/core/xml/SAXParserUtil.java

         */
        public static void parse(final SAXParser parser, final InputSource inputSource, final DefaultHandler handler) {
            assertArgumentNotNull("parser", parser);
            assertArgumentNotNull("inputSource", inputSource);
            assertArgumentNotNull("handler", handler);
    
            try {
                parser.parse(inputSource, handler);
            } catch (final SAXException e) {
    Registered: Sat Dec 20 08:55:33 UTC 2025
    - Last Modified: Thu Jul 31 08:16:49 UTC 2025
    - 3K bytes
    - Viewed (0)
  6. src/main/java/org/codelibs/fess/query/parser/QueryParser.java

         * @return the parsed Query object
         * @throws QueryParseException if the query cannot be parsed
         */
        public Query parse(final String query) {
            return filterChain.parse(query);
        }
    
        /**
         * Creates a new Lucene query parser with the current configuration.
         * The parser is configured with the default field, analyzer, wildcard settings,
         * and default operator.
         *
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 10.2K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/SitemapsHelper.java

         * @param parser the SAX parser to configure
         * @throws SAXNotRecognizedException if the parser doesn't recognize the feature
         * @throws SAXNotSupportedException if the parser doesn't support the feature
         */
        protected void disableExternalResources(final SAXParser parser) throws SAXNotRecognizedException, SAXNotSupportedException {
            try {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 13:19:40 UTC 2025
    - 34.9K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

        @Resource
        protected CrawlerContainer crawlerContainer;
    
        /** Map of parser features to configure the DOM parser. */
        protected Map<String, String> featureMap = new HashMap<>();
    
        /** Map of parser properties to configure the DOM parser. */
        protected Map<String, String> propertyMap = new HashMap<>();
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Nov 29 07:42:33 UTC 2025
    - 30.5K bytes
    - Viewed (0)
  9. fess-crawler/pom.xml

    			<artifactId>tika-parser-apple-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-audiovideo-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-cad-module</artifactId>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Dec 20 06:34:36 UTC 2025
    - 12.1K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

        }
    
        /**
         * Creates a parse context.
         *
         * @param parser A parser.
         * @param params A map of parameters.
         * @return a parse context.
         */
        protected ParseContext createParseContext(final Parser parser, final Map<String, String> params) {
            final ParseContext parseContext = new ParseContext();
            parseContext.set(Parser.class, parser);
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 30.8K bytes
    - Viewed (0)
Back to top