unparse - Code Search

fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

     * @param stream the input stream to parse
     * @return the parsed RobotsTxt object, or null if disabled
     */
    public RobotsTxt parse(final InputStream stream) {
        return parse(stream, Constants.UTF_8);
    }

    /**
     * Parses a robots.txt file from the given input stream using the specified character encoding.
     * @param stream the input stream to parse
     * @param charsetName the character encoding to use

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 7.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/SitemapsHelper.java

     */
    public SitemapSet parse(final InputStream in) {
        return parse(in, true);
    }

    /**
     * Parses a sitemap from the given input stream.
     * @param in the input stream to parse
     * @param recursive whether to recursively parse compressed files
     * @return the parsed sitemap set
     */
    protected SitemapSet parse(final InputStream in, final boolean recursive) {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 14.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

        } finally {
            FileUtil.deleteInBackground(tempFile);
        }
    }

    /**
     * Creates a parse context.
     *
     * @param parser A parser.
     * @param params A map of parameters.
     * @return a parse context.
     */
    protected ParseContext createParseContext(final Parser parser, final Map<String, String> params) {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 30.7K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/util/XmlUtil.java

            } catch (final Exception e) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Failed to set a property.", e);
                }
            }
            // parse a content
            parser.parse(is, handler);

            return handler.getDataMap();
        } catch (final Exception e) {
            throw new CrawlerSystemException("Could not create a data map from XML content.", e);
        }

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 9.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

 * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
 * and adds them as child URLs to be crawled.
 *
 * <p>
 * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
 * It then iterates through the sitemaps in the SitemapSet, extracts the URL
 * from each sitemap, and creates a new {@link RequestData} object for each URL.

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 3.4K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/client/fs/FileSystemClient.java

            File file = null;
            try {
                file = new File(new URI(filePath));
            } catch (final URISyntaxException e) {
                logger.warn("Could not parse url: " + filePath, e);
            }

            if (file == null) {
                responseData.setHttpStatusCode(Constants.NOT_FOUND_STATUS_CODE);
                responseData.setCharSet(charset);

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 13.8K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

        final DOMParser parser = getDomParser();
        try (final Reader reader = new StringReader(content)) {
            parser.parse(new InputSource(reader));
        } catch (final Exception e) {
            logger.warn("Failed to parse the content.", e);
            return new ExtractData(extractString(content));
        }

        final Document document = parser.getDocument();
        try {

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 9.3K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

            if (responseData.getCharSet() != null) {
                is.setEncoding(responseData.getCharSet());
            }
            parser.parse(is);
        } catch (final Exception e) {
            throw new CrawlingAccessException("Could not parse " + responseData.getUrl(), e);
        }
        final Document document = parser.getDocument();

        final StringBuilder buf = new StringBuilder(1000);

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 13.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PasswordBasedExtractor.java

                                .collect(Collectors.toList());
                    } catch (final Exception e) {
                        logger.warn("Failed to parse passwords for " + url, e);
                        list = Collections.emptyList();
                    }
                    configPasswordMap.put(value, list);
                }

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Thu Aug 07 02:55:08 UTC 2025

- 5.1K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                        }
                    }
                }
            } catch (final IOException e) {
                logger.warn("Failed to parse annotation.", e);
            }
        }
    }

    /**
     * Extracts text from an embedded file using the appropriate extractor.
     * @param filename the filename of the embedded file

Registered: Sun Sep 21 03:50:09 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 12.7K bytes

- Viewed (0)

Search Options