extraction - Code Search

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractor.java

    /**
     * Constructs a new CommandExtractor.
     */
    public CommandExtractor() {
        // NOP
    }

    /*
     * (non-Javadoc)
     *
     * @see org.codelibs.fess.crawler.extractor.Extractor#getText(java.io.InputStream,
     * java.util.Map)
     */
    @Override
    public ExtractData getText(final InputStream in, final Map<String, String> params) {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 16K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/client/ftp/FtpClient.java

    }

    /**
     * Retrieves response data from the FTP server for the specified URI.
     * This method handles the actual FTP operations including directory listing,
     * file retrieval, and metadata extraction.
     *
     * @param uri The URI to retrieve data from
     * @param includeContent Whether to include the actual content in the response
     * @return The response data containing the retrieved information

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 39.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler-lasta/src/main/resources/crawler/extractor.xml

		class="org.codelibs.fess.crawler.extractor.impl.MsPowerPointExtractor" />
	<component name="msPublisherExtractor"
		class="org.codelibs.fess.crawler.extractor.impl.MsPublisherExtractor" />
	<component name="msVisioExtractor"
		class="org.codelibs.fess.crawler.extractor.impl.MsVisioExtractor" />
	<component name="pdfExtractor"
		class="org.codelibs.fess.crawler.extractor.impl.PdfExtractor" />
	<component name="lhaExtractor"

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Aug 01 21:40:30 UTC 2020

- 49K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/helper/impl/MimeTypeHelperImplTest.java

        assertContentType("application/pdf", "extractor/test.pdf", "hoge.pdf");

        assertContentType("application/gzip", "extractor/gz/test.tar.gz", "hoge.tar.gz");
        assertContentType("application/zip", "extractor/zip/test.zip", "hoge.zip");
        assertContentType("application/x-lharc", "extractor/lha/test.lzh", "hoge.lzh"); // TODO is it correct?

        assertContentType("application/xml", "extractor/test.mm", "hoge.mm");

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 11.6K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/ExtractorFactory.java

     * If no extractor exists for the key, a new array containing the extractor is created and associated with the key.
     *
     * @param key       The key associated with the extractor. Must not be null or blank.
     * @param extractor The extractor to add. Must not be null.
     */
    public void addExtractor(final String key, final Extractor extractor) {
        if (StringUtil.isBlank(key)) {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 7.3K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractorTest.java

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.compressors.CompressorStreamFactory;

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 3.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/CommandExtractorTest.java

        final CommandExtractor extractor = new CommandExtractor();
        extractor.executionTimeout = 1000L;
        extractor.command = getCommand(scriptFile);
        final Map<String, String> params = new HashMap<String, String>();
        try {
            final ExtractData data = extractor.getText(new FileInputStream(contentFile), params);
            fail(data.toString());

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 9.8K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.IOException;
import java.io.InputStream;
import java.util.Map;

import org.apache.poi.hslf.usermodel.HSLFShape;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.codelibs.fess.crawler.entity.ExtractData;

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 2.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/JodExtractorTest.java

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.io.CloseableUtil;

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 9.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/XmlExtractorTest.java

        final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_sjis.xml");
        final String content = xmlExtractor.getText(in, null).getContent();
        CloseableUtil.closeQuietly(in);
        logger.info(content);
        assertTrue(content.contains("テスト"));
    }

    public void test_getXml_entity() {
        final InputStream in = ResourceUtil.getResourceAsStream("extractor/test_entity.xml");

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sat Mar 15 06:52:00 UTC 2025

- 6.4K bytes

- Viewed (0)

Search Options