Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 9 of 9 for docx (0.01 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

            extensionMap.put("odt", "txt");
            extensionMap.put("ott", "txt");
            extensionMap.put("sxw", "txt");
            extensionMap.put("rtf", "txt");
            extensionMap.put("doc", "txt");
            extensionMap.put("docx", "txt");
            extensionMap.put("wpd", "txt");
            extensionMap.put("txt", "txt");
            extensionMap.put("html", "txt");
            // Spreadsheet Formats
            extensionMap.put("ods", "tsv");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            assertEquals("application/msword", extractData.getValues("Content-Type")[0]);
        }
    
        public void test_getTika_mswordx() {
            final InputStream in = ResourceUtil.getResourceAsStream("extractor/msoffice/test.docx");
            final ExtractData extractData = tikaExtractor.getText(in, null);
            final String content = extractData.getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  3. src/main/java/org/codelibs/fess/suggest/index/SuggestIndexer.java

                final List<Map<String, Object>> docs = new ArrayList<>(docPerReq);
                try (final DocumentReader documentReader = reader.get()) {
                    Map<String, Object> doc = documentReader.read();
                    while (doc != null) {
                        if (Thread.currentThread().isInterrupted()) {
                            break;
                        }
                        docs.add(doc);
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Thu Aug 07 02:41:28 UTC 2025
    - 34.8K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

            private final Node doc;
    
            public DefaultNamespaceContext(final Node doc) {
                this.doc = doc;
            }
    
            @Override
            public String getNamespaceURI(final String prefix) {
                return getNamespaceForPrefix(prefix, doc);
            }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                final PDDocument doc = document;
                final Set<Exception> exceptionSet = new HashSet<>();
                final Thread task = new Thread(() -> {
                    try {
                        stripper.writeText(doc, writer);
                        extractEmbeddedDocuments(doc, writer);
                        extractAnnotations(doc, writer);
                    } catch (final Exception e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

    import org.codelibs.fess.crawler.exception.RobotsTxtException;
    
    /**
     * Robots.txt Specifications:
     * <ul>
     * <li><a href=
     * "https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt"
     * >https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
     * </a></li>
     * </ul>
     *
     * @author bowez
     * @author shinsuke
     *
     */
    public class RobotsTxtHelper {
    
        /** Pattern for parsing user-agent records. */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java

    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Gets a text from .doc file.
     *
     * @author shinsuke
     *
     */
    public class MsWordExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsWordExtractor instance.
         */
        public MsWordExtractor() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

         * Returns the result data header.
         * @return The result data header.
         */
        protected String getResultDataHeader() {
            // TODO: Support other XML header types
            return "<?xml version=\"1.0\"?>\n<doc>\n";
        }
    
        /**
         * Returns the result data body for a single value.
         * @param name The name of the field.
         * @param value The value of the field.
         * @return The result data body.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  9. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/http/HcHttpClientTest.java

        // public ResponseData call() throws Exception {
        // String[] urls =
        // new String[] {
        // "http://.../",
        // "http://.../test.pdf",
        // "http://.../test.doc",
        // "http://.../test.xls",
        // "http://.../test.ppt",
        // "http://.../test.txt", };
        // for (String url : urls) {
        // ResponseData responseData = httpClient.doGet(url);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.7K bytes
    - Viewed (0)
Back to top