- Sort Score
- Result 10 results
- Languages All
Results 81 - 90 of 103 for etext (0.01 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.util.regex.Pattern; /** * Extracts text content from XML documents. */ public class XmlExtractor extends AbstractXmlExtractor { /** * Creates a new XmlExtractor instance. */ public XmlExtractor() { super(); }Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/pom.xml
<artifactId>commons-lang3</artifactId> <version>${commons.lang3.version}</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-text</artifactId> <version>${commons.text.version}</version> </dependency> <dependency> <groupId>commons-net</groupId> <artifactId>commons-net</artifactId> <version>${commons.net.version}</version> </dependency>
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 11.3K bytes - Viewed (0) -
LICENSE
(d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributedRegistered: Fri Sep 19 09:08:11 UTC 2025 - Last Modified: Mon Jan 11 04:30:09 UTC 2021 - 11.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java
public static final String FILE_PASSWORDS = "file.passwords"; /** Map containing metadata key-value pairs */ protected Map<String, String[]> metadata = new HashMap<>(); /** The extracted content text */ protected String content; /** * Constructs a new ExtractData. */ public ExtractData() { // Default constructor } /**
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Sep 06 04:15:37 UTC 2025 - 3.8K bytes - Viewed (0) -
fess-crawler/src/test/resources/ajax/js/jquery-2.1.1.min.js
essData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":pc,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?tc(tc(a,n.ajaxSe...Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 82.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java
import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.codelibs.fess.crawler.util.IgnoreCloseInputStream; import jakarta.annotation.Resource; /** * Extracts text content from ZIP archives. */ public class ZipExtractor extends AbstractExtractor { private static final Logger logger = LogManager.getLogger(ZipExtractor.class); /** * The archive stream factory.Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.5K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java
TikaExtractor tikaExtractor = container.getComponent("tikaExtractor"); factory.addExtractor("text/plain", tikaExtractor); factory.addExtractor("text/html", tikaExtractor); })// ; tikaExtractor = container.getComponent("tikaExtractor"); } public void test_getTika_text() {
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 30.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/SitemapFile.java
public class SitemapFile implements Sitemap { private static final long serialVersionUID = 1L; /** * Identifies the location of the Sitemap. This location can be a Sitemap, * an Atom file, RSS file or a simple text file. */ private String loc; /** * Identifies the time that the corresponding Sitemap file was modified. It * does not correspond to the time that any of the pages listed in thatRegistered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 4.4K bytes - Viewed (1) -
README.md
### Content Formats #### Office Documents - Microsoft Office (Word, Excel, PowerPoint) - OpenOffice/LibreOffice documents - RTF, WordPerfect #### PDFs and Images - PDF documents (text and metadata extraction) - Images (JPEG, PNG, GIF, TIFF, BMP) - Image metadata (EXIF, IPTC, XMP) #### Archives and Compressed Files - ZIP, TAR, GZ archives - LHA compression format - Nested archive extraction
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformerTest.java
responseData.setUrl("http://hoge/test.html"); responseData.setResponseBody(data); responseData.setCharSet("ISO-8859-1"); responseData.setMimeType("text/html"); final ResultData resultData = htmlTransformer.transform(responseData); assertEquals(content, new String(resultData.getData())); assertEquals(1, resultData.getChildUrlSet().size());
Registered: Sun Sep 21 03:50:09 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 13.8K bytes - Viewed (0)