- Sort Score
- Result 10 results
- Languages All
Results 41 - 49 of 49 for ExtractData (0.06 sec)
-
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java
public TarExtractor() { super(); } @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { validateInputStream(in); final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper(); final ExtractorFactory extractorFactory = getExtractorFactory(); return new ExtractData(getTextInternal(in, mimeTypeHelper, extractorFactory)); } /**
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Dec 11 08:38:29 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java
* @return The password. */ protected String getPassword(final Map<String, String> params) { final String url = params != null ? params.get(ExtractData.URL) : null; if (!passwordMap.isEmpty()) { final String resourceName = params != null ? params.get(ExtractData.RESOURCE_NAME_KEY) : null; String value = null; if (StringUtil.isNotEmpty(url)) { value = url;Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Thu Aug 07 02:55:08 UTC 2025 - 5.1K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractorTest.java
import org.apache.logging.log4j.Logger; import org.codelibs.core.io.CloseableUtil; import org.codelibs.core.io.ResourceUtil; import org.codelibs.fess.crawler.container.StandardCrawlerContainer; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.dbflute.utflute.core.PlainTestCase; /** * @author shinsuke * */ public class HtmlExtractorTest extends PlainTestCase {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Mar 15 06:52:00 UTC 2025 - 3.7K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ApiExtractor.java
accessTimeoutTask = TimeoutManager.getInstance().addTimeoutTarget(accessTimeoutTarget, accessTimeout, false); } final ExtractData data = new ExtractData(); final HttpPost httpPost = new HttpPost(url); final HttpEntity postEntity = MultipartEntityBuilder.create() .setMode(HttpMultipartMode.BROWSER_COMPATIBLE)
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 12.2K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java
} final Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType()); final Map<String, String> params = new HashMap<>(); params.put(ExtractData.RESOURCE_NAME_KEY, getResourceName(responseData)); params.put(ExtractData.CONTENT_TYPE, responseData.getMimeType()); String content = null; try (final InputStream in = responseData.getResponseBody()) {Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Jul 06 02:13:03 UTC 2025 - 6.5K bytes - Viewed (0) -
fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlXpathExtractor.java
import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; import javax.xml.xpath.XPathNodes; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.crawler.entity.ExtractData; import org.codelibs.fess.crawler.exception.CrawlerSystemException; import org.codelibs.fess.crawler.exception.ExtractException; import org.codelibs.fess.crawler.util.XPathAPI; import org.codelibs.nekohtml.parsers.DOMParser;
Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sat Oct 04 08:47:19 UTC 2025 - 10.4K bytes - Viewed (0) -
fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/AbstractExtractorTest.java
private InputStream lastValidatedStream = null; @Override public ExtractData getText(final InputStream in, final Map<String, String> params) { validateInputStream(in); validateCalled = true; lastValidatedStream = in; return new ExtractData("test content"); } public boolean isValidateCalled() { return validateCalled;Registered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Mon Nov 24 03:59:47 UTC 2025 - 8.4K bytes - Viewed (0) -
CLAUDE.md
### Adding a Content Extractor 1. **Implement `Extractor`**: ```java public class MyExtractor extends AbstractExtractor { @Override public ExtractData getText(InputStream in, Map<String, String> params) { ExtractData data = new ExtractData(); // Extract text data.setContent(extractedText); return data; } } ``` 2. **Register**: ```xmlRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Fri Nov 28 17:31:34 UTC 2025 - 10.7K bytes - Viewed (0) -
README.md
```java public class CustomExtractor extends AbstractExtractor { @Override public ExtractData getText(final InputStream inputStream, final Map<String, String> params) { // Custom extraction logic ExtractData extractData = new ExtractData(); // ... implementation return extractData; } } // Register custom extractorRegistered: Sat Dec 20 11:21:39 UTC 2025 - Last Modified: Sun Aug 31 05:32:52 UTC 2025 - 15.3K bytes - Viewed (0)