Search Options

Results per page
Sort
Preferred Languages
Advance

Results 61 - 70 of 70 for resources (0.04 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/DefaultResponseProcessor.java

    import org.codelibs.fess.crawler.service.UrlQueueService;
    import org.codelibs.fess.crawler.transformer.Transformer;
    import org.codelibs.fess.crawler.util.CrawlingParameterUtil;
    
    import jakarta.annotation.Resource;
    
    /**
     * <p>
     * {@link DefaultResponseProcessor} is a default implementation of {@link ResponseProcessor}.
     * It processes the response data based on the HTTP status code and configured transformer.
     * </p>
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.5K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

        protected String getEncoding(final BufferedInputStream bis) {
            final byte[] b = new byte[preloadSizeForCharset];
            try {
                bis.mark(preloadSizeForCharset);
                @SuppressWarnings("resource")
                final BOMInputStream bomIn = new BOMInputStream(bis, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
                        ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE, BOM_UTF_7);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.5K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

    import org.w3c.dom.NamedNodeMap;
    import org.w3c.dom.Node;
    
    import com.google.common.cache.CacheBuilder;
    import com.google.common.cache.CacheLoader;
    import com.google.common.cache.LoadingCache;
    
    import jakarta.annotation.Resource;
    
    /**
     * <p>
     * XmlTransformer is a class that extends AbstractTransformer to transform XML documents into a specific format for indexing.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

    import org.codelibs.fess.crawler.util.XPathAPI;
    import org.codelibs.nekohtml.parsers.DOMParser;
    import org.w3c.dom.Document;
    import org.w3c.dom.Node;
    import org.xml.sax.InputSource;
    
    import jakarta.annotation.Resource;
    
    /**
     * The {@code HtmlTransformer} class is responsible for transforming HTML responses
     * during the crawling process. It extracts data, identifies child URLs, and handles
     * character set encoding.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  5. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    				"application/prs.plucker",
    				"application/qsig",
    				"application/reginfo+xml",
    				"application/relax-ng-compact-syntax",
    				"application/remote-printing",
    				"application/resource-lists+xml",
    				"application/resource-lists-diff+xml",
    				"application/riscos",
    				"application/rlmi+xml",
    				"application/rls-services+xml",
    				"application/rsd+xml",
    				"application/rss+xml",
    				"application/rtf",
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapUrl.java

         * desired, and use YYYY-MM-DD.
         *
         * Note that this tag is separate from the If-Modified-Since (304) header
         * the server can return, and search engines may use the information from
         * both sources differently.
         */
        private String lastmod;
    
        /**
         * How frequently the page is likely to change. This value provides general
         * information to search engines and may not correlate exactly to how often
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  7. fess-crawler-opensearch/src/main/java/org/codelibs/fess/crawler/service/impl/AbstractCrawlerService.java

    import org.opensearch.search.SearchHits;
    import org.opensearch.search.sort.SortBuilder;
    
    import com.google.common.hash.HashFunction;
    import com.google.common.hash.Hashing;
    
    import jakarta.annotation.Resource;
    
    /**
     * Abstract base class for crawler services that interact with OpenSearch.
     *
     * @author shinsuke
     *
     */
    public abstract class AbstractCrawlerService {
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 34.2K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/impl/AbstractRuleTest.java

            // Verify
            assertEquals("serializeRule", deserializedRule.getRuleId());
            assertNotNull(deserializedRule.getResponseProcessor());
    
            // Note: crawlerContainer is transient (marked with @Resource)
            // so it won't be serialized
            assertNull(deserializedRule.crawlerContainer);
        }
    
        /**
         * Test with null CrawlerContainer
         */
        public void test_nullCrawlerContainer() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 21.9K bytes
    - Viewed (0)
  9. src/main/java/org/codelibs/fess/suggest/settings/SuggestSettings.java

            tuples.add(new Tuple<>(DefaultKeys.SUPPORTED_FIELDS, "content"));
            return tuples;
        }
    
        /**
         * Loads the index settings from a resource file.
         * @return The index settings as a string.
         * @throws IOException If an I/O error occurs.
         */
        protected String loadIndexSettings() throws IOException {
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Thu Aug 07 02:41:28 UTC 2025
    - 18.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <sub-class-of type="text/plain"/>
        <glob pattern="*.rnc"/>
      </mime-type>
      <mime-type type="application/remote-printing"/>
      <mime-type type="application/resource-lists+xml">
        <glob pattern="*.rl"/>
      </mime-type>
      <mime-type type="application/resource-lists-diff+xml">
        <glob pattern="*.rld"/>
      </mime-type>
      <mime-type type="application/riscos"/>
      <mime-type type="application/rlmi+xml"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
Back to top