Search Options

Results per page
Sort
Preferred Languages
Advance

Results 21 - 30 of 268 for Extract (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/Extractor.java

     * optionally override the default weight value.
     */
    public interface Extractor {
    
        /**
         * Extracts text data from the given input stream.
         *
         * @param in the input stream to extract text from
         * @param params a map of parameters to be used during extraction
         * @return an ExtractData object containing the extracted text
         */
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sat Mar 15 06:52:00 UTC 2025
    - 1.6K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/ZipExtractor.java

                            } catch (final Exception e) {
                                failedEntries++;
                                if (logger.isDebugEnabled()) {
                                    logger.debug("Failed to extract content from archive entry: {}", filename, e);
                                }
                            }
                        }
                    }
                }
            } catch (final MaxLengthExceededException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 4.8K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsExcelExtractor.java

                return new ExtractData(excelExtractor.getText());
            } catch (final IOException e) {
                throw new ExtractException("Failed to extract text from Excel document.", e);
            }
        }
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Mon Nov 24 03:59:47 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsWordExtractor.java

                return new ExtractData(wordExtractor.getText());
            } catch (final IOException e) {
                throw new ExtractException("Failed to extract text from Word document.", e);
            }
        }
    
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Wed Nov 19 08:55:01 UTC 2025
    - 1.7K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/TextTransformer.java

    import jakarta.annotation.Resource;
    
    /**
     * TextTransformer is a class that transforms a ResponseData object into a ResultData object containing the extracted text content.
     * It uses an Extractor to extract the text from the response body based on the MIME type.
     * The extracted text is then converted into a byte array using the specified charset encoding.
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 6.5K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TarExtractor.java

                            } catch (final Exception e) {
                                failedEntries++;
                                if (logger.isDebugEnabled()) {
                                    logger.debug("Failed to extract content from archive entry: {}", filename, e);
                                }
                            }
                        }
                    }
                }
            } catch (final MaxLengthExceededException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 5.1K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsVisioExtractor.java

            if (in == null) {
                throw new CrawlerSystemException("Microsoft Visio input stream is null. Cannot extract text from null input.");
            }
            try {
                @SuppressWarnings("resource")
                final VisioTextExtractor visioTextExtractor = new VisioTextExtractor(in);
                return new ExtractData(visioTextExtractor.getText());
            } catch (final IOException e) {
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Sun Nov 23 12:19:14 UTC 2025
    - 1.9K bytes
    - Viewed (0)
  8. ci/official/utilities/repack_libtensorflow.sh

    # Returns:
    #   None
    function cp_normalized_srcjar() {
      src_jar="$1"
      dest_jar="$2"
      tmp_dir=$(mktemp -d)
      cp "${src_jar}" "${tmp_dir}/orig.jar"
      pushd "${tmp_dir}"
      # Extract any src/ files
      jar -xf "${tmp_dir}/orig.jar" src/
      # Extract any org/ files under src/main/java
      (mkdir -p src/main/java && cd src/main/java && jar -xf "${tmp_dir}/orig.jar" org/)
      # Repackage src/
      jar -cMf "${tmp_dir}/new.jar" src
      popd
    Registered: Tue Dec 30 12:39:10 UTC 2025
    - Last Modified: Fri Jan 17 16:25:18 UTC 2025
    - 5.7K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/gcs/Handler.java

             * This constructor parses the URL to extract bucket and object names.
             *
             * @param url The GCS URL to connect to
             */
            protected GcsURLConnection(final URL url) {
                super(url);
                // Extract bucket name from host
                bucketName = url.getHost() != null ? url.getHost() : StringUtil.EMPTY;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 08:38:29 UTC 2025
    - 9.6K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/net/protocol/s3/Handler.java

             * This constructor parses the URL to extract bucket and object names.
             *
             * @param url The S3 URL to connect to
             */
            protected S3URLConnection(final URL url) {
                super(url);
                // Extract bucket name from host
                bucketName = url.getHost() != null ? url.getHost() : StringUtil.EMPTY;
    Registered: Sat Dec 20 11:21:39 UTC 2025
    - Last Modified: Thu Dec 11 07:57:44 UTC 2025
    - 9.5K bytes
    - Viewed (0)
Back to top