Search Options

Results per page
Sort
Preferred Languages
Advance

Results 91 - 100 of 239 for Rules (0.03 sec)

  1. src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

        private static final int UTF8_BOM_SIZE = 3;
    
        /** Flag indicating whether content should be pruned */
        public boolean prunedContent = true;
    
        /** Map containing URL conversion rules (regex patterns to replacement strings) */
        protected Map<String, String> convertUrlMap = new LinkedHashMap<>();
    
        /** Fess configuration instance */
        protected FessConfig fessConfig;
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 54.4K bytes
    - Viewed (0)
  2. fess-crawler/src/main/java/org/codelibs/fess/crawler/service/impl/UrlFilterServiceImpl.java

    import org.codelibs.fess.crawler.service.UrlFilterService;
    
    import jakarta.annotation.Resource;
    
    /**
     * Implementation of the {@link UrlFilterService} interface.
     * This class provides methods for managing URL filtering rules,
     * including adding include and exclude URL patterns, deleting patterns,
     * and retrieving lists of compiled URL patterns. It utilizes a
     * {@link MemoryDataHelper} to store and manage the URL patterns in memory.
     *
     */
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 4.2K bytes
    - Viewed (0)
  3. fess-crawler/src/main/java/org/codelibs/fess/crawler/transformer/impl/XpathTransformer.java

    import org.xml.sax.InputSource;
    
    /**
     * {@link XpathTransformer} is a class that transforms HTML content into XML format based on XPath expressions.
     * It extracts data from an HTML document by applying XPath rules defined in {@link #fieldRuleMap}.
     * The extracted data is then formatted into an XML structure and stored in the {@link ResultData}.
     * <p>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 13.1K bytes
    - Viewed (0)
  4. fess-crawler/src/main/java/org/codelibs/fess/crawler/CrawlerContext.java

        /**
         * Filter for URLs to control which URLs are processed.
         */
        protected UrlFilter urlFilter;
    
        /**
         * Manager for crawling rules and configurations.
         */
        protected RuleManager ruleManager;
    
        /**
         * Controller for managing crawling intervals and delays.
         */
        protected IntervalController intervalController;
    
        /**
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 8.9K bytes
    - Viewed (0)
  5. src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

                ComponentUtil.getCrawlingConfigHelper().remove(sid);
                deleteCrawlData(sid);
            }
        }
    
        /**
         * Gets the list of available boost document rules.
         *
         * @return List of boost document rules that are currently available
         */
        protected List<BoostDocumentRule> getAvailableBoostDocumentRuleList() {
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Jul 17 08:28:31 UTC 2025
    - 24.9K bytes
    - Viewed (0)
  6. okhttp/src/commonJvmAndroid/kotlin/okhttp3/internal/concurrent/TaskQueue.kt

          for (futureTask in futureTasks) {
            if (futureTask is AwaitIdleTask) {
              return futureTask.latch
            }
          }
    
          // Don't delegate to schedule() because that enforces shutdown rules.
          val newTask = AwaitIdleTask()
          if (scheduleAndDecide(newTask, 0L, recurrence = false)) {
            taskRunner.kickCoordinator(this)
          }
          return newTask.latch
        }
      }
    
    Registered: Fri Sep 05 11:42:10 UTC 2025
    - Last Modified: Wed May 28 23:28:25 UTC 2025
    - 7.3K bytes
    - Viewed (0)
  7. src/main/java/org/codelibs/fess/util/GsaConfigParser.java

     *
     * <p>The parser handles the following GSA configuration elements:
     * <ul>
     * <li>Collections with good/bad URL patterns</li>
     * <li>Global parameters including start URLs and filtering rules</li>
     * <li>User agent settings</li>
     * <li>URL pattern matching with regular expressions and contains filters</li>
     * </ul>
     *
     */
    public class GsaConfigParser extends DefaultHandler {
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Thu Aug 07 03:06:29 UTC 2025
    - 21.5K bytes
    - Viewed (0)
  8. fess-crawler/src/test/java/org/codelibs/fess/crawler/transformer/TransformerTest.java

                    return null;
                }
    
                ResultData resultData = new ResultData();
                resultData.setTransformerName(name);
                // Apply transformation rules
                try (InputStream is = responseData.getResponseBody()) {
                    byte[] bytes = is.readAllBytes();
                    String content = new String(bytes);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 28K bytes
    - Viewed (0)
  9. src/test/java/org/codelibs/fess/helper/SambaHelperTest.java

                    return name;
                }
            });
            sambaHelper.init();
    
            // Test with Turkish locale characters that have special lowercase rules
            assertEquals("1üser", sambaHelper.getAccountId(createMockSID(1, "Üser")));
        }
    
        public void test_getAccountId_exception_handling() throws SmbException {
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Sat Jul 19 23:49:30 UTC 2025
    - 14.7K bytes
    - Viewed (0)
  10. src/main/java/org/codelibs/fess/opensearch/config/exentity/CrawlingConfig.java

                public static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
                public static final String SCRIPT_TYPE = "script.type";
                public static final String HTML_CHILD_URL_RULES = "html.child.url.rules";
                public static final String CRAWL_ORDER = "crawl.order";
            }
    
            // meta.*
            // meta.<field>=<value>
    
            // value.*
            // value.<field>=<value>
    
    Registered: Thu Sep 04 12:52:25 UTC 2025
    - Last Modified: Sat Mar 15 06:53:53 UTC 2025
    - 5.6K bytes
    - Viewed (0)
Back to top