- Sort Score
- Result 10 results
- Languages All
Results 61 - 70 of 81 for Extractor (0.16 sec)
-
fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml
<include path="crawler/client.xml"/> <include path="crawler/rule.xml"/> <include path="crawler/filter.xml"/> <include path="crawler/interval.xml"/> <include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Nov 07 04:44:10 UTC 2024 - 2.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TextExtractor.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.Map; import org.codelibs.core.io.InputStreamUtil; import org.codelibs.fess.crawler.Constants; import org.codelibs.fess.crawler.entity.ExtractData;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 2K bytes - Viewed (0) -
README.md
### Multi-Module Structure ``` fess-crawler-parent/ ├── fess-crawler/ # Core crawler framework │ ├── client/ # Protocol clients (HTTP, FTP, SMB, etc.) │ ├── extractor/ # Content extractors │ ├── transformer/ # Data transformers │ └── service/ # Core services ├── fess-crawler-lasta/ # LastaFlute DI integration └── fess-crawler-opensearch/ # OpenSearch backend
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java
import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.extractor.ExtractorFactory; import org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl; import org.dbflute.utflute.core.PlainTestCase; /** * Test class for archive extractor error handling improvements. * Tests partial extraction, error recovery, and improved error messages. */
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JsonExtractor.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 9.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CsvExtractor.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 12.8K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
import org.apache.logging.log4j.Logger; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 30.8K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/util/ComponentUtil.java
*/ public static IntervalControlHelper getIntervalControlHelper() { return getComponent(INTERVAL_CONTROL_HELPER); } /** * Gets the extractor factory component. * @return The extractor factory. */ public static ExtractorFactory getExtractorFactory() { return getComponent(EXTRACTOR_FACTORY); } /** * Gets a job executor by name.Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 28.9K bytes - Viewed (0) -
fess-crawler-lasta/src/main/resources/crawler.xml
<include path="crawler/container.xml"/> <include path="crawler/client.xml"/> <include path="crawler/rule.xml"/> <include path="crawler/filter.xml"/> <include path="crawler/interval.xml"/> <include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Tue Nov 28 13:40:25 UTC 2017 - 1.7K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractorEnhancedTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 7K bytes - Viewed (0)