- Sort Score
- Result 10 results
- Languages All
Results 51 - 60 of 81 for Extractor (0.04 sec)
-
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TarExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java
/* * (non-Javadoc) * * @see * org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getEncodingPattern() */ @Override protected Pattern getEncodingPattern() { return metaCharsetPattern; } /* * (non-Javadoc) * * @see org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor#getTagPattern() */ @OverrideRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Oct 04 08:47:19 UTC 2025 - 9.3K bytes - Viewed (0) -
CLAUDE.md
``` 3. **Add to factory**: ```java clientFactory.addClient("^myprotocol://.*", myClient); ``` 4. **Add tests**: Unit + integration ### Adding a Content Extractor 1. **Implement `Extractor`**: ```java public class MyExtractor extends AbstractExtractor { @Override public ExtractData getText(InputStream in, Map<String, String> params) {
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 28 17:31:34 UTC 2025 - 10.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java
import com.fasterxml.jackson.databind.ObjectMapper; /** * PasswordBasedExtractor is an abstract base class for extractors that can handle password-protected files. * It provides functionality to manage passwords for different file patterns using regular expressions. * * <p>The extractor supports two types of password management: * <ul> * <li>Static passwords configured via {@link #addPassword(String, String)}</li>
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPublisherExtractor.java
* governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.IOException; import java.io.InputStream; import java.util.Map; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 2K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractorTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 2.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java
/** * Constructs a new CommandExtractor. */ public CommandExtractor() { // NOP } /* * (non-Javadoc) * * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream, * java.util.Map) */ @Override public ExtractData getText(final InputStream in, final Map<String, String> params) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 12:19:14 UTC 2025 - 16.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractor.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.util.regex.Pattern; /** * Extracts text content from XML documents. */ public class XmlExtractor extends AbstractXmlExtractor { /** * Creates a new XmlExtractor instance.Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 2.6K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TextExtractorEnhancedTest.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import org.codelibs.core.io.ResourceUtil; import org.codelibs.fess.crawler.container.StandardCrawlerContainer;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 8.9K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MarkdownExtractor.java
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.crawler.extractor.impl; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.logging.log4j.LogManager;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Nov 23 03:46:53 UTC 2025 - 8.2K bytes - Viewed (0)