Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 6 of 6 for Specification (0.17 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                }
            }
            return SSLConnectionSocketFactory.getSocketFactory();
        }
    
        /**
         * Builds the cookie specification registry.
         *
         * @return The configured cookie specification registry
         */
        protected Lookup<CookieSpecProvider> buildCookieSpecRegistry() {
            if (cookieSpecRegistry != null) {
                return cookieSpecRegistry;
            }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                    extractFile(ent.getKey(), embeddedFile, writer);
                }
            }
        }
    
        /**
         * Gets the embedded file from a file specification, trying different platform-specific variants.
         * @param fileSpec the file specification
         * @return the embedded file, or null if not found
         */
        protected PDEmbeddedFile getEmbeddedFile(final PDComplexFileSpecification fileSpec) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/XmlUtil.java

            );
        }
    
        /**
         * Strips invalid XML characters from the input string.
         *
         * This method removes characters that are not allowed in XML documents
         * according to the XML 1.0 specification. Valid characters include:
         * - Tab (0x9)
         * - Line feed (0xA)
         * - Carriage return (0xD)
         * - Any character between 0x20 and 0xD7FF
         * - Any character between 0xE000 and 0xFFFD
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/RobotsTxt.java

     * This class manages the rules defined in a robots.txt file, including user agent directives,
     * allowed/disallowed paths, crawl delays, and sitemap URLs.
     *
     * <p>The robots.txt protocol is implemented according to the standard specification,
     * supporting pattern matching for user agents, path-based access control, and crawl delay settings.</p>
     *
     * <p>Key features:</p>
     * <ul>
     *   <li>Supports multiple user-agent directives with pattern matching</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10K bytes
    - Viewed (0)
  5. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

      <mime-type type="application/vnd.ms-wpl">
        <glob pattern="*.wpl"/>
      </mime-type>
      <mime-type type="application/vnd.ms-xpsdocument">
        <alias type="application/oxps"/>
        <_comment>Open XML Paper Specification</_comment>
        <glob pattern="*.xps"/>
        <glob pattern="*.oxps"/>
        <sub-class-of type="application/x-tika-ooxml"/>
      </mime-type>
    
      <mime-type type="application/vnd.msa-disk-image">
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

    import org.codelibs.fess.crawler.entity.RobotsTxt;
    import org.codelibs.fess.crawler.entity.RobotsTxt.Directive;
    import org.codelibs.fess.crawler.exception.RobotsTxtException;
    
    /**
     * Robots.txt Specifications:
     * <ul>
     * <li><a href=
     * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt"
     * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
     * </a></li>
     * </ul>
     *
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
Back to top