Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 81 for Extractor (0.24 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/JodExtractor.java

            final Extractor extractor = getExtractor(outExt);
            if (extractor != null) {
                final Map<String, String> params = new HashMap<>();
                params.put(ExtractData.RESOURCE_NAME_KEY, outputFile.getName());
                try (final FileInputStream in = new FileInputStream(outputFile)) {
                    final ExtractData extractData = extractor.getText(in, params);
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 10.4K bytes
    - Viewed (0)
  2. src/main/java/org/codelibs/fess/crawler/transformer/FessStandardTransformer.java

        }
    
        /**
         * Gets the appropriate extractor for the given response data.
         * Selects an extractor based on the MIME type or falls back to the Tika extractor.
         *
         * @param responseData the response data containing the document to extract
         * @return the extractor instance for processing the document
         * @throws FessSystemException if no suitable extractor can be found
         */
        @Override
    Registered: Sat Dec 20 09:19:18 UTC 2025
    - Last Modified: Fri Nov 28 16:29:12 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java

            validateInputStream(in);
            try (final HSSFWorkbook workbook = new HSSFWorkbook(in);
                    final org.apache.poi.hssf.extractor.ExcelExtractor excelExtractor =
                            new org.apache.poi.hssf.extractor.ExcelExtractor(workbook)) {
                return new ExtractData(excelExtractor.getText());
            } catch (final IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java

         */
        @Override
        public ExtractData getText(final InputStream in, final Map<String, String> params) {
            validateInputStream(in);
            try (final org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(in)) {
                return new ExtractData(wordExtractor.getText());
            } catch (final IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 1.7K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/ExtractorFactoryTest.java

            final Extractor extractor = new Extractor() {
                public ExtractData getText(final InputStream in, final Map<String, String> params) {
                    return null;
                }
            };
    
            assertNull(extractorFactory.getExtractor("test"));
            extractorFactory.addExtractor("test", extractor);
            assertEquals(extractor, extractorFactory.getExtractor("test"));
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 6.9K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    import org.codelibs.core.io.ResourceUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.Map;
    
    import org.apache.poi.hdgf.extractor.VisioTextExtractor;
    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  9. fess-crawler/src/test/resources/extractor/json/test.json

    {
      "title": "Sample Document",
      "author": "John Doe",
      "version": "1.0",
      "published": "2025-01-15",
      "tags": ["crawler", "extractor", "json"],
      "content": {
        "summary": "This is a sample JSON document for testing",
        "body": "The extractor should handle nested objects and arrays properly"
      },
      "metadata": {
        "created_at": "2025-01-01T00:00:00Z",
        "updated_at": "2025-01-15T12:00:00Z"
      }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 03:46:53 UTC 2025
    - 412 bytes
    - Viewed (0)
  10. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractorTest.java

     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     * either express or implied. See the License for the specific language
     * governing permissions and limitations under the License.
     */
    package org.codelibs.fess.crawler.extractor.impl;
    
    import java.io.InputStream;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.logging.log4j.LogManager;
    import org.apache.logging.log4j.Logger;
    import org.codelibs.core.io.CloseableUtil;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 7.6K bytes
    - Viewed (0)
Back to top