Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 7 of 7 for Record (0.15 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/helper/RobotsTxtHelper.java

        /** Pattern for parsing allow records. */
        protected static final Pattern ALLOW_RECORD = Pattern.compile("^allow:\\s*([^\\s]*)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /** Pattern for parsing crawl-delay records. */
        protected static final Pattern CRAWL_DELAY_RECORD = Pattern.compile("^crawl-delay:\\s*([^\\s]+)\\s*$", Pattern.CASE_INSENSITIVE);
    
        /**
         * Pattern for Sitemap record.
         */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 7.7K bytes
    - Viewed (0)
  2. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <_comment>SAS XPORT Transfer File</_comment>
        <glob pattern="*.xpt"/>
        <glob pattern="*.xport"/>
        <magic priority="40">
          <match value="HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" offset="0" />
        </magic>
      </mime-type>
      <mime-type type="application/x-spss-sav">
        <_comment>SPSS Data File</_comment>
        <glob pattern="*.sav"/>
        <magic priority="50">
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (2)
  3. fess-crawler/src/test/java/org/codelibs/fess/crawler/filter/UrlFilterTest.java

            urlFilter.addInclude("https://first.com/.*");
    
            // Second initialization with same session ID
            urlFilter.init(sessionId);
            urlFilter.addInclude("https://second.com/.*");
    
            // Both patterns should work
            assertTrue(urlFilter.match("https://first.com/page"));
            assertTrue(urlFilter.match("https://second.com/page"));
        }
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Wed Sep 03 14:42:53 UTC 2025
    - 19K bytes
    - Viewed (0)
  4. README.md

    crawler.crawlerContext.setNumOfThread(10);
    
    // Set maximum crawl depth
    crawler.crawlerContext.setMaxDepth(3);
    
    // Set request interval (politeness)
    crawler.crawlerContext.setDefaultIntervalTime(1000); // 1 second
    ```
    
    ### URL Filtering
    
    ```java
    // Include patterns
    crawler.urlFilter.addInclude("https://example.com/.*");
    crawler.urlFilter.addInclude(".*\\.pdf$");
    
    // Exclude patterns  
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  5. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/impl/AbstractTransformerTest.java

            assertEquals("name\nwith\nnewlines", testTransformer.getName());
        }
    
        /**
         * Test multiple name changes
         */
        public void test_multipleNameChanges() {
            String[] names = { "first", "second", "third", "fourth", "fifth" };
    
            for (String name : names) {
                testTransformer.setName(name);
                assertEquals(name, testTransformer.getName());
            }
    
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 20.8K bytes
    - Viewed (0)
  6. fess-crawler/src/test/java/org/codelibs/fess/crawler/rule/RuleManagerTest.java

            // Remove one instance
            assertTrue(ruleManager.removeRule(rule));
            assertEquals(1, ruleManager.getRuleCount());
            assertTrue(ruleManager.hasRule(rule));
    
            // Remove second instance
            assertTrue(ruleManager.removeRule(rule));
            assertEquals(0, ruleManager.getRuleCount());
            assertFalse(ruleManager.hasRule(rule));
        }
    
        /**
         * Test empty RuleManager
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 23.8K bytes
    - Viewed (0)
  7. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

            // Create chain of transformers
            List<Transformer> transformerChain = new ArrayList<>();
            transformerChain.add(new TestTransformer("first"));
            transformerChain.add(new TestTransformer("second"));
            transformerChain.add(new TestTransformer("third"));
    
            ResponseData responseData = new ResponseData();
            responseData.setUrl("http://example.com");
    
            ResultData currentResult = null;
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
Back to top