- Sort Score
- Result 10 results
- Languages All
Results 61 - 70 of 363 for extractor (0.06 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
/** * Constructs a new CommandExtractor. */ public CommandExtractor() { // NOP } /* * (non-Javadoc) * * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream, * java.util.Map) */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 16.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java
* * <p> * This class provides methods to extract text from an input stream, handling different scenarios such as: * </p> * <ul> * <li>Normalizing text content</li> * <li>Handling resource names and content types</li> * <li>Retrying extraction without resource name or content type if the initial attempt fails</li> * <li>Extracting text from metadata if the main content extraction fails</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 30.8K bytes - Viewed (0) -
README.md
- Audio formats (MP3, WAV, FLAC) - Video formats (MP4, AVI, MOV) - Metadata extraction from media files ## Architecture ### Multi-Module Structure ``` fess-crawler-parent/ ├── fess-crawler/ # Core crawler framework │ ├── client/ # Protocol clients (HTTP, FTP, SMB, etc.) │ ├── extractor/ # Content extractors │ ├── transformer/ # Data transformers
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
* either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.util.regex.Pattern; /** * Extracts text content from XML documents. */ public class XmlExtractor extends AbstractXmlExtractor { /** * Creates a new XmlExtractor instance. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java
throw new ExtractException(e); } } return encoding; } /** * Extracts text content from the given content by removing tags and processing attributes. * @param content The content to extract from. * @return The extracted text. */ protected String extractString(final String content) { String input = content.replaceAll("[\\r\\n]", " ");
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 8.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler-opensearch/src/main/resources/crawler_opensearch.xml
<include path="crawler/client.xml"/> <include path="crawler/rule.xml"/> <include path="crawler/filter.xml"/> <include path="crawler/interval.xml"/> <include path="crawler/extractor.xml"/> <include path="crawler/mimetype.xml"/> <include path="crawler/encoding.xml"/> <include path="crawler/urlconverter.xml"/> <include path="crawler/log.xml"/> <include path="crawler/sitemaps.xml"/>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Nov 07 04:44:10 UTC 2024 - 2.2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/FilenameExtractorEnhancedTest.java
} /** * Test extraction with null parameters map. */ public void test_getText_withNullParams() { final InputStream in = new ByteArrayInputStream(new byte[0]); final ExtractData result = filenameExtractor.getText(in, null); assertNotNull(result); assertEquals("", result.getContent()); } /** * Test extraction with empty parameters map. */Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 7K bytes - Viewed (0) -
src/main/java/org/codelibs/fess/util/ComponentUtil.java
*/ public static IntervalControlHelper getIntervalControlHelper() { return getComponent(INTERVAL_CONTROL_HELPER); } /** * Gets the extractor factory component. * @return The extractor factory. */ public static ExtractorFactory getExtractorFactory() { return getComponent(EXTRACTOR_FACTORY); } /** * Gets a job executor by name.Registered: Sat Dec 20 09:19:18 UTC 2025 - Last Modified: Thu Jul 17 08:28:31 UTC 2025 - 28.9K bytes - Viewed (0)