Search Options

Results per page
Sort
Preferred Languages
Advance

Results 31 - 40 of 59 for called (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.java

            final DOMParser parser = getDomParser();
            try (final Reader reader = new StringReader(content)) {
                parser.parse(new InputSource(reader));
            } catch (final Exception e) {
                logger.warn("Failed to parse the content.", e);
                return new ExtractData(extractString(content));
            }
    
            final Document document = parser.getDocument();
            try {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.3K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/SitemapsResponseProcessor.java

    import jakarta.annotation.Resource;
    
    /**
     * A response processor implementation that handles sitemaps.
     * It parses the response body as a SitemapSet, extracts URLs from the sitemaps,
     * and adds them as child URLs to be crawled.
     *
     * <p>
     * This class uses a {@link SitemapsHelper} to parse the sitemap XML or text.
     * It then iterates through the sitemaps in the SitemapSet, extracts the URL
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 3.4K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/util/XmlUtil.java

                    parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, StringUtil.EMPTY);
                } catch (final Exception e) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Failed to set a property.", e);
                    }
                }
                // parse a content
                parser.parse(is, handler);
    
                return handler.getDataMap();
            } catch (final Exception e) {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 9.4K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/rule/impl/SitemapsRule.java

                } catch (final CrawlingAccessException e) {
                    throw e;
                } catch (final Exception e) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Failed a sitemap check: {}", responseData, e);
                    }
                }
            }
    
            return false;
        }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.6K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ResponseData.java

            mimeType = contentType;
        }
    
        /**
         * Gets the URL of the crawled resource.
         *
         * @return the URL
         */
        public String getUrl() {
            return url;
        }
    
        /**
         * Sets the URL of the crawled resource.
         *
         * @param url the URL to set
         */
        public void setUrl(final String url) {
            this.url = url;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 11.6K bytes
    - Viewed (0)
  6. fess-crawler/src/main/java/org/codelibs/fess/crawler/entity/ExtractData.java

    import org.apache.tika.metadata.Message;
    import org.apache.tika.metadata.TIFF;
    import org.apache.tika.metadata.TikaCoreProperties;
    import org.apache.tika.metadata.TikaMimeKeys;
    
    /**
     * Represents extracted data from a crawled resource, including content and metadata.
     */
    public class ExtractData
            implements TikaCoreProperties, CreativeCommons, Geographic, HttpHeaders, Message, ClimateForcast, TIFF, TikaMimeKeys, Serializable {
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 3.8K bytes
    - Viewed (0)
  7. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/CrawlerClientCreator.java

                crawlerClientFactory.addClient(regex, client);
            } catch (final Exception e) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Failed to create {}.", componentName, e);
                } else {
                    logger.info("{} is not available.", componentName);
                }
            }
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.5K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/client/http/HcHttpClient.java

                        consumer.accept(response, httpEntity);
                    } catch (final Exception e) {
                        request.abort();
                        logger.warn("Failed to authenticate on " + scheme, e);
                    } finally {
                        EntityUtils.consumeQuietly(httpEntity);
                    }
                });
            });
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 52.2K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

                            }
                        }
                    }
                } catch (final IOException e) {
                    logger.warn("Failed to parse annotation.", e);
                }
            }
        }
    
        /**
         * Extracts text from an embedded file using the appropriate extractor.
         * @param filename the filename of the embedded file
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 12.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

    import org.codelibs.fess.crawler.entity.ResponseData;
    import org.codelibs.fess.crawler.entity.ResultData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    
    /**
     * <p>
     * FileTransformer stores the content of a crawled resource as a file on the file system.
     * It extends HtmlTransformer and provides functionality to:
     * </p>
     * <ul>
     *     <li>Specify a base directory for storing files.</li>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
Back to top