Search Options

Results per page
Sort
Preferred Languages
Advance

Results 81 - 90 of 103 for etext (0.01 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.util.regex.Pattern;
    
    /**
     * Extracts text content from XML documents.
     */
    public class XmlExtractor extends AbstractXmlExtractor {
    
        /**
         * Creates a new XmlExtractor instance.
         */
        public XmlExtractor() {
            super();
        }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  2. fess-crawler/pom.xml

    			<artifactId>commons-lang3</artifactId>
    			<version>${commons.lang3.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.commons</groupId>
    			<artifactId>commons-text</artifactId>
    			<version>${commons.text.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>commons-net</groupId>
    			<artifactId>commons-net</artifactId>
    			<version>${commons.net.version}</version>
    		</dependency>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.3K bytes
    - Viewed (0)
  3. LICENSE

          (d) If the Work includes a "NOTICE" text file as part of its
              distribution, then any Derivative Works that You distribute must
              include a readable copy of the attribution notices contained
              within such NOTICE file, excluding those notices that do not
              pertain to any part of the Derivative Works, in at least one
              of the following places: within a NOTICE text file distributed
    Registered: Fri Sep 19 09:08:11 UTC 2025
    - Last Modified: Mon Jan 11 04:30:09 UTC 2021
    - 11.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

        public static final String FILE_PASSWORDS = "file.passwords";
    
        /** Map containing metadata key-value pairs */
        protected Map<String, String[]> metadata = new HashMap<>();
    
        /** The extracted content text */
        protected String content;
    
        /**
         * Constructs a new ExtractData.
         */
        public ExtractData() {
            // Default constructor
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  5. fess-crawler/src/test/resources/ajax/js/jquery-2.1.1.min.js

    essData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":pc,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?tc(tc(a,n.ajaxSe...
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Oct 11 02:16:55 UTC 2015
    - 82.3K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

    import org.codelibs.fess.crawler.helper.MimeTypeHelper;
    import org.codelibs.fess.crawler.util.IgnoreCloseInputStream;
    
    import jakarta.annotation.Resource;
    
    /**
     * Extracts text content from ZIP archives.
     */
    public class ZipExtractor extends AbstractExtractor {
        private static final Logger logger = LogManager.getLogger(ZipExtractor.class);
    
        /**
         * The archive stream factory.
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

                        TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
                        factory.addExtractor("text/plain", tikaExtractor);
                        factory.addExtractor("text/html", tikaExtractor);
                    })//
            ;
    
            tikaExtractor = container.getComponent("tikaExtractor");
        }
    
        public void test_getTika_text() {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java

    public class SitemapFile implements Sitemap {
    
        private static final long serialVersionUID = 1L;
    
        /**
         * Identifies the location of the Sitemap. This location can be a Sitemap,
         * an Atom file, RSS file or a simple text file.
         */
        private String loc;
    
        /**
         * Identifies the time that the corresponding Sitemap file was modified. It
         * does not correspond to the time that any of the pages listed in that
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.4K bytes
    - Viewed (1)
  9. README.md

    ### Content Formats
    
    #### Office Documents
    - Microsoft Office (Word, Excel, PowerPoint)
    - OpenOffice/LibreOffice documents
    - RTF, WordPerfect
    
    #### PDFs and Images
    - PDF documents (text and metadata extraction)
    - Images (JPEG, PNG, GIF, TIFF, BMP)
    - Image metadata (EXIF, IPTC, XMP)
    
    #### Archives and Compressed Files
    - ZIP, TAR, GZ archives
    - LHA compression format
    - Nested archive extraction
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformerTest.java

            responseData.setUrl("http://hoge/test.html");
            responseData.setResponseBody(data);
            responseData.setCharSet("ISO-8859-1");
            responseData.setMimeType("text/html");
            final ResultData resultData = htmlTransformer.transform(responseData);
            assertEquals(content, new String(resultData.getData()));
            assertEquals(1, resultData.getChildUrlSet().size());
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 13.8K bytes
    - Viewed (0)
Back to top