Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 10 of 28 for urlset (0.03 sec)

  1. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/ftp/FtpClientTest.java

                fail();
            } catch (final ChildUrlsException e) {
                final Set<RequestData> urlSet = e.getChildUrlList();
                assertEquals(1, urlSet.size());
                for (final RequestData requestData : urlSet.toArray(new RequestData[urlSet.size()])) {
                    String url = requestData.getUrl();
                    assertTrue(url.contains("dir1/test3.txt"));
                }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 18K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/SitemapsHelper.java

                if (preloadDate.indexOf("<urlset") >= 0) {
                    // XML Sitemaps
                    bis.reset();
                    return parseXmlSitemaps(bis);
                }
                if (preloadDate.indexOf("<sitemapindex") >= 0) {
                    // XML Sitemaps Index
                    bis.reset();
                    return parseXmlSitemapsIndex(bis);
                }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 14.7K bytes
    - Viewed (0)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/CrawlerContextTest.java

         */
        public void test_robotsTxtUrlSet() {
            Set<String> urlSet = crawlerContext.getRobotsTxtUrlSet();
            assertNotNull(urlSet);
            assertTrue(urlSet instanceof LruHashSet);
            assertTrue(urlSet.isEmpty());
    
            // Add URLs to default set
            urlSet.add("http://example.com/robots.txt");
            urlSet.add("http://test.com/robots.txt");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 25.6K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/processor/impl/DefaultResponseProcessor.java

            }
            return true;
        }
    
        /**
         * Stores child URLs found in the response data.
         *
         * @param crawlerContext the crawler context
         * @param childUrlList the set of child URLs
         * @param url the parent URL
         * @param depth the depth of the child URLs
         * @param encoding the encoding of the child URLs
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 12.5K bytes
    - Viewed (0)
  5. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerThread.java

                return;
            }
    
            // add url and filter
            final Set<String> urlSet = new HashSet<>();
            final List<UrlQueue<?>> childList = childUrlList.stream()
                    .filter(d -> StringUtil.isNotBlank(d.getUrl()) && urlSet.add(d.getUrl()) && crawlerContext.urlFilter.match(d.getUrl()))
                    .map(d -> {
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 20.4K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            StatefulTransformer transformer = new StatefulTransformer("statefulTransformer");
    
            // Process multiple URLs
            String[] urls = { "http://example1.com", "http://example2.com", "http://example3.com" };
    
            for (String url : urls) {
                ResponseData responseData = new ResponseData();
                responseData.setUrl(url);
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/client/smb/SmbClientTest.java

            } catch (final ChildUrlsException e) {
                String[] urls = e.getChildUrlList().stream().map(r -> r.getUrl()).sorted().toArray(String[]::new);
                assertEquals(3, urls.length);
                assertEquals(baseUrl + "dir1/", urls[0]);
                assertEquals(baseUrl + "dir3/", urls[1]);
                assertEquals(baseUrl + "file1.txt", urls[2]);
            }
            try {
                smbClient.doGet(baseUrl + "dir1/");
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 13.7K bytes
    - Viewed (0)
  8. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/HtmlTransformer.java

            }
            return null;
        }
    
        /**
         * Extracts URLs from HTML tag attributes using XPath.
         *
         * @param url the base URL for resolving relative URLs
         * @param document the document to extract URLs from
         * @param xpath the XPath expression to select elements
         * @param attr the attribute name to extract URLs from
         * @param encoding the character encoding to use
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 28.5K bytes
    - Viewed (0)
  9. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/FileTransformer.java

            this.path = path;
        }
    
        /**
         * Gets the replacement string for question marks in URLs.
         *
         * @return the question mark replacement string
         */
        public String getQuestionStr() {
            return questionStr;
        }
    
        /**
         * Sets the replacement string for question marks in URLs.
         *
         * @param questionStr the question mark replacement string to set
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 11.7K bytes
    - Viewed (0)
  10. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractor.java

                                InputStream in = null;
                                try {
                                    if (isByteStream) {
                                        inputStream.reset();
                                        in = inputStream;
                                    } else {
                                        in = new FileInputStream(tempFile);
                                    }
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.7K bytes
    - Viewed (0)
Back to top