- Sort Score
- Result 10 results
- Languages All
Results 191 - 200 of 452 for crawler_ (0.07 sec)
-
fess-crawler-lasta/src/main/resources/crawler/urlconverter.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <!-- Converts child links to normalize it. --> <component name="urlConvertHelper" class="org.codelibs.fess.crawler.helper.UrlConvertHelper"> </component>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 394 bytes - Viewed (0) -
src/main/resources/crawler/extractor+tikaExtractor.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <component name="tikaExtractor" class="org.codelibs.fess.crawler.extractor.impl.TikaExtractor"> <property name="maxCompressionRatio">2</property> <property name="maxUncompressionSize">10000000</property> </component>
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Sun Aug 25 12:46:12 UTC 2019 - 461 bytes - Viewed (0) -
src/main/java/org/codelibs/fess/crawler/interval/FessIntervalController.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.interval; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.fess.crawler.interval.impl.DefaultIntervalController; import org.codelibs.fess.helper.IntervalControlHelper; import org.codelibs.fess.util.ComponentUtil; /**
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Wed Nov 19 07:09:17 UTC 2025 - 5.1K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/helper/ProtocolHelper.java
/** * Helper class for managing and validating URL protocols in Fess crawling system. * This class handles the initialization and validation of web and file protocols * used by the crawler to determine which URLs can be crawled. */ public class ProtocolHelper { private static final Logger logger = LogManager.getLogger(ProtocolHelper.class);Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Fri Dec 12 13:58:40 UTC 2025 - 12.4K bytes - Viewed (1) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor; import java.io.InputStream; import java.util.Map; import org.codelibs.fess.crawler.entity.ExtractData; /** * The Extractor interface defines methods for extracting text data from an input stream.
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.6K bytes - Viewed (0) -
src/main/resources/crawler/contentlength.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <component name="contentLengthHelper" class="org.codelibs.fess.crawler.helper.ContentLengthHelper" instance="singleton"> <property name="defaultMaxLength">10485760</property><!-- 10M --> <postConstruct name="addMaxLength"> <arg>"text/html"</arg>
Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Sun Oct 11 06:51:14 UTC 2015 - 561 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.Map; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; /**
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Wed Nov 19 08:55:01 UTC 2025 - 2.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb1/SmbClientTest.java
import org.codelibs.fess.crawler.exception.ChildUrlsException; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.CrawlingAccessException; import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.helper.ContentLengthHelper; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Tue Sep 23 06:42:59 UTC 2025 - 30K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ChildUrlsException.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; import java.util.Set; import org.codelibs.fess.crawler.entity.RequestData; /** * {@link ChildUrlsException} is thrown when child URLs are found during crawling.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/exception/ExecutionTimeoutException.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.exception; /** * An exception indicating that the execution of a process has timed out. * This exception extends {@link org.codelibs.fess.crawler.exception.ExtractException}. * */ public class ExecutionTimeoutException extends ExtractException {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 1.8K bytes - Viewed (0)