Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 75 for comment (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

    import java.util.regex.Pattern;
    
    import org.apache.commons.io.ByteOrderMark;
    import org.apache.commons.io.input.BOMInputStream;
    import org.apache.commons.text.translate.AggregateTranslator;
    import org.apache.commons.text.translate.CharSequenceTranslator;
    import org.apache.commons.text.translate.EntityArrays;
    import org.apache.commons.text.translate.LookupTranslator;
    import org.apache.commons.text.translate.NumericEntityUnescaper;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.5K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

     *   <li>Retrying extraction without resource name or content type if the initial attempt fails</li>
     *   <li>Extracting text from metadata if the main content extraction fails</li>
     *   <li>Reading content as plain text if all other methods fail</li>
     *   <li>Applying post-extraction filters</li>
     *   <li>Handling Tika exceptions, including zip bomb exceptions</li>
     * </ul>
     *
     * <p>
     * The class also supports configuration options such as:
     * </p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
  3. README.md

    ### Key Features
    
    - **Multi-Protocol Support**: HTTP/HTTPS, File System, FTP, SMB/CIFS, Cloud Storage (MinIO, S3)
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  4. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractorTest.java

            final InputStream in = ResourceUtil.getResourceAsStream("extractor/zip/test.zip");
            final String content = zipExtractor.getText(in, null).getContent();
            CloseableUtil.closeQuietly(in);
            logger.info(content);
            assertTrue(content.contains("テスト"));
            assertTrue(content.contains("テキスト"));
        }
    
        public void test_getText_maxSize() throws IOException {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 3.7K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XmlTransformer.java

    import org.codelibs.fess.crawler.util.XmlUtil;
    import org.w3c.dom.Document;
    import org.w3c.dom.NamedNodeMap;
    import org.w3c.dom.Node;
    
    import com.google.common.cache.CacheBuilder;
    import com.google.common.cache.CacheLoader;
    import com.google.common.cache.LoadingCache;
    
    import jakarta.annotation.Resource;
    
    /**
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 23.9K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

            this.tempDir = tempDir;
        }
    
        /**
         * Sets the command to execute for text extraction.
         * @param command The command to set.
         */
        public void setCommand(final String command) {
            this.command = command;
        }
    
        /**
         * Sets the timeout for command execution.
         * @param executionTimeout The execution timeout in milliseconds.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 16K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

    import java.io.BufferedInputStream;
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.commons.compress.archivers.ArchiveInputStream;
    import org.apache.commons.compress.archivers.ArchiveStreamFactory;
    import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.commons.compress.archivers.ArchiveInputStream;
    import org.apache.commons.compress.archivers.ArchiveStreamFactory;
    import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 5K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java

         * The pattern captures the charset value specified in the content attribute of the meta tag.
         * Example: &lt;meta http-equiv="Content-Type" content="text/html; charset=UTF-8"&gt;
         */
        protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>",
                Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 10.3K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/ftp/FtpClient.java

    import org.apache.commons.lang3.StringUtils;
    import org.apache.commons.net.ftp.FTP;
    import org.apache.commons.net.ftp.FTPClient;
    import org.apache.commons.net.ftp.FTPClient.NatServerResolverImpl;
    import org.apache.commons.net.ftp.FTPClientConfig;
    import org.apache.commons.net.ftp.FTPFile;
    import org.apache.commons.net.ftp.FTPFileFilters;
    import org.apache.commons.net.ftp.FTPSClient;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 39.5K bytes
    - Viewed (0)
Back to top