- Sort Score
- Result 10 results
- Languages All
Results 81 - 90 of 538 for crawlers (0.12 sec)
-
fess-crawler/src/test/resources/org/codelibs/fess/crawler/helper/robots.txt
User-agent: * Disallow: /private/ Disallow: /help # disallows /help.html, /help/index.html, etc. Allow: /help/faq.html Crawl-delay: 3 User-agent: Crawler Disallow: /aaa User-agent: Crawler/1.0 Disallow: /bbb User-agent: Crawler/2.0 Disallow: /ccc User-agent: Hoge Crawler Disallow: /ddd sitemap: http://www.example.com/sitmap.xml
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sun Oct 11 02:16:55 UTC 2015 - 566 bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/service/DataService.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.service; import java.util.List; import org.codelibs.fess.crawler.entity.AccessResult; import org.codelibs.fess.crawler.util.AccessResultCallback; /** * @author shinsuke * */ public interface DataService<RESULT extends AccessResult<?>> {
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 1.3K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/transformer_basic.xml
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <component name="binaryTransformer" class="org.codelibs.fess.crawler.transformer.impl.BinaryTransformer" instance="singleton"> <property name="name">"binaryTransformer"</property> </component>
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sun Sep 30 21:21:24 UTC 2018 - 3.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/interval/impl/AbstractIntervalController.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.interval.impl; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.interval.IntervalController; /** * @author shinsuke * */ public abstract class AbstractIntervalController implements IntervalController {
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 2.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java
*/ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; /** * Gets a text from . file.
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 1.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java
*/ package org.codelibs.fess.crawler; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import org.codelibs.core.collection.LruHashSet; import org.codelibs.fess.crawler.filter.UrlFilter; import org.codelibs.fess.crawler.interval.IntervalController; import org.codelibs.fess.crawler.rule.RuleManager; /** * @author shinsuke * */
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sat Oct 12 01:40:57 UTC 2024 - 4.5K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler/client.xml
"http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <include path="crawler/robotstxt.xml" /> <include path="crawler/contentlength.xml" /> <include path="crawler/mimetype.xml" /> <component name="internalHttpClient" class="org.codelibs.fess.crawler.client.http.HcHttpClient" instance="prototype"> <property name="connectionTimeout">15000</property>
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Tue Aug 08 12:54:47 UTC 2023 - 2.8K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java
import org.codelibs.fess.crawler.exception.MaxLengthExceededException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl; import org.dbflute.utflute.core.PlainTestCase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author shinsuke * */ public class TarExtractorTest extends PlainTestCase {
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 3.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java
import org.codelibs.core.lang.ThreadUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.Extractor; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.MimeTypeHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory;
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Thu Feb 22 01:36:27 UTC 2024 - 9.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/client/ftp/FtpClient.java
import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.builder.RequestDataBuilder; import org.codelibs.fess.crawler.client.AbstractCrawlerClient; import org.codelibs.fess.crawler.client.AccessTimeoutTarget; import org.codelibs.fess.crawler.entity.RequestData; import org.codelibs.fess.crawler.entity.ResponseData; import org.codelibs.fess.crawler.exception.ChildUrlsException;
Registered: Sun Nov 10 03:50:12 UTC 2024 - Last Modified: Sat Oct 12 01:41:37 UTC 2024 - 25.7K bytes - Viewed (0)