Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 7 of 7 for QuantizeMultiplier (0.28 sec)

  1. tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc

      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0)), 1.0);
      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-1)), 1.0e-1);
      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-2)), 1.0e-2);
      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-3)), 1.0e-3);
      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-4)), 1.0e-4);
      ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-5)), 1.0e-5);
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Mon Apr 08 01:38:03 UTC 2024
    - 3.9K bytes
    - Viewed (0)
  2. tensorflow/compiler/mlir/quantization/stablehlo/utils/math_utils_test.cc

      int32_t shift;
    
      EXPECT_TRUE(succeeded(QuantizeMultiplier(1.2, quantized_fraction, shift)));
      EXPECT_EQ(quantized_fraction, 19661);
      EXPECT_EQ(shift, 1);
    
      EXPECT_TRUE(succeeded(QuantizeMultiplier(15.5, quantized_fraction, shift)));
      EXPECT_EQ(quantized_fraction, 31744);
      EXPECT_EQ(shift, 4);
    
      EXPECT_TRUE(succeeded(QuantizeMultiplier(1, quantized_fraction, shift)));
      EXPECT_EQ(quantized_fraction, 16384);
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Fri Apr 26 05:58:41 UTC 2024
    - 2K bytes
    - Viewed (0)
  3. tensorflow/compiler/mlir/quantization/stablehlo/utils/math_utils.h

    namespace mlir::quant::stablehlo {
    
    // Decomposes a given floating point value num into a normalized and quantized
    // fraction and an integral power of two.
    LogicalResult QuantizeMultiplier(double double_multiplier,
                                     int32_t& quantized_fraction, int32_t& shift);
    
    }  // namespace mlir::quant::stablehlo
    
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Mon Sep 18 07:43:59 UTC 2023
    - 1.3K bytes
    - Viewed (0)
  4. tensorflow/compiler/mlir/quantization/stablehlo/utils/math_utils.cc

    // https://github.com/tensorflow/tensorflow/blob/f5c9cbb1c462912030bd845244118f952cbbbd5e/tensorflow/core/kernels/uniform_quant_ops/math_utils.cc#L29
    // And then modified so that it doesn't overflow int32 values.
    LogicalResult QuantizeMultiplier(double double_multiplier,
                                     int32_t& quantized_fraction, int32_t& shift) {
      if (!std::isfinite(double_multiplier) || double_multiplier <= 0) {
        return failure();
      }
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Tue Mar 05 08:32:43 UTC 2024
    - 2.1K bytes
    - Viewed (0)
  5. tensorflow/compiler/mlir/lite/quantization/numerical_utils.h

    // Decompose double precision multiplier to integer multiplier and exponent.
    //    double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
    // int_multiplier will be range of (2^31, 2^30].
    QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
    
    // Calculate the effective quantized value range for the scale, zero point. The
    // range is the minimum range defined by [rmin, rmax] and [qmin, qmax].
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Tue Jun 07 18:43:51 UTC 2022
    - 1.8K bytes
    - Viewed (0)
  6. tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc

    //   multiplier and the shift amount. The shift amount is the number of bits
    //   that the quantized multiplier should be shifted to the right before being
    //   used.
    QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
      if (double_multiplier < 1e-6) {
        return {0, 0};
      }
    
      int32_t shift;
      const double q = frexp(double_multiplier, &shift);
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Tue Oct 17 19:57:04 UTC 2023
    - 3.3K bytes
    - Viewed (0)
  7. tensorflow/compiler/mlir/lite/quantization/device_target.cc

      // input multipliers
      input_multipliers->append(3, kUnitQuantizedMultiplier);
    
      // output multipliers
      double real_multiplier = scale_product / o_spec.getScale();
      output_multipliers->push_back(QuantizeMultiplier(real_multiplier));
    
      // output ranges
      auto min = rop->getAttrOfType<FloatAttr>("min");
      auto max = rop->getAttrOfType<FloatAttr>("max");
      output_ranges->push_back(CalculateQuantizedRange(
    Registered: Sun Jun 16 05:45:23 UTC 2024
    - Last Modified: Fri Mar 08 10:41:08 UTC 2024
    - 7.3K bytes
    - Viewed (0)
Back to top