Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 64 for ditamap (0.93 sec)

  1. fess-crawler/src/test/resources/sitemaps/sitemap2.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
       <sitemap>
          <loc>http://www.example.com/sitemap1.xml.gz</loc>
          <lastmod>2004-10-01T18:23:17+00:00</lastmod>
       </sitemap>
       <sitemap>
          <loc>http://www.example.com/sitemap2.xml.gz</loc>
          <lastmod>2005-01-01</lastmod>
       </sitemap>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 376 bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java

    package org.codelibs.fess.crawler.entity;
    
    import org.codelibs.core.lang.StringUtil;
    
    /**
     * Represents a Sitemap file entry, conforming to the Sitemap XML format.
     * This class holds information about a single Sitemap, including its location and last modification timestamp.
     * It implements the {@link Sitemap} interface.
     *
     * <p>
     * A Sitemap file provides search engines with a list of URLs available for crawling.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.4K bytes
    - Viewed (1)
  3. src/main/java/org/codelibs/fess/app/web/admin/general/AdminGeneralAction.java

            validate(form, messages -> {}, () -> asHtml(path_AdminGeneral_AdminGeneralJsp));
    
            final String[] toAddresses = form.notificationTo.split(",");
            final Map<String, Object> dataMap = new HashMap<>();
            dataMap.put("hostname", systemHelper.getHostname());
    
            final FessConfig fessConfig = ComponentUtil.getFessConfig();
            final Postbox postbox = ComponentUtil.getComponent(Postbox.class);
            try {
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Sat Dec 13 02:21:17 UTC 2025
    - 14.4K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/RobotsTxtHelperTest.java

            String[] sitemaps = robotsTxt.getSitemaps();
            assertEquals(2, sitemaps.length);
            assertEquals("http://www.example.com/sitmap.xml", sitemaps[0]);
            assertEquals("http://www.example.net/sitmap.xml", sitemaps[1]);
    
        }
    
        public void testParse_disable() {
            final InputStream in = RobotsTxtHelperTest.class.getResourceAsStream("robots.txt");
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 20.6K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/SitemapsHelper.java

     */
    public class SitemapsHelper {
        private static final Logger logger = LogManager.getLogger(SitemapsHelper.class);
    
        /** The size of the preload buffer for checking file format. */
        protected int preloadSize = 512;
    
        /** Enable validation of sitemap entries. */
        protected boolean enableValidation = false;
    
        /** Maximum URL length according to sitemap specification. */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Fri Nov 14 13:19:40 UTC 2025
    - 34.9K bytes
    - Viewed (0)
  6. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <sub-class-of type="application/dita+xml"/>
        <_comment>DITA Map</_comment>
        <root-XML localName="map"/>
        <root-XML localName="map" namespaceURI="http://docs.oasis-open.org/namespace"/>
        <glob pattern="*.ditamap"/>
      </mime-type>
      <mime-type type="application/dita+xml;format=topic">
        <sub-class-of type="application/dita+xml"/>
        <_comment>DITA Topic</_comment>
        <root-XML localName="topic"/>
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Oct 16 07:46:32 UTC 2025
    - 320.2K bytes
    - Viewed (5)
  7. fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots_wildcard.txt

    Allow: /page
    
    # Test multiple wildcards
    User-agent: MultiWildcardBot
    Disallow: /*.cgi*
    Disallow: /*?*id=*
    
    # Test literal $ in middle of pattern
    User-agent: DollarBot
    Disallow: /price$info
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Nov 13 14:03:41 UTC 2025
    - 910 bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java

     * represents a valid sitemap. It uses a SitemapsHelper to validate the response body as an InputStream.
     * The rule checks if the URL matches the defined regex pattern and then validates the content as a sitemap.
     * If any exception occurs during the sitemap validation, it logs the error and returns false.
     *
     */
    public class SitemapsRule extends RegexRule {
        /**
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

            String[] sitemaps = new String[] { "http://example.com/sitemap.xml", "http://test.com/sitemap.xml" };
            crawlerContext.addSitemaps(sitemaps);
    
            // Remove and verify
            String[] removedSitemaps = crawlerContext.removeSitemaps();
            assertNotNull(removedSitemaps);
            assertEquals(2, removedSitemaps.length);
            assertEquals("http://example.com/sitemap.xml", removedSitemaps[0]);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  10. src/main/resources/crawler/rule.xml

    			<component class="org.codelibs.fess.crawler.processor.impl.SitemapsResponseProcessor">
    			</component>
    		</property>
    		<postConstruct name="addRule">
    			<arg>"url"</arg>
    			<arg>"http[s]?:.*sitemap[^/]*\.xml.*|http[s]?:.*sitemap[^/]*\.gz.*|http[s]?:.*sitemap[^/]*\.txt.*"</arg>
    		</postConstruct>
    	</component>
    
    	<component name="webHtmlRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
    		<property name="ruleId">"webHtmlRule"</property>
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Thu Jun 04 08:42:49 UTC 2020
    - 4.6K bytes
    - Viewed (0)
Back to top