XlaLaunch - Code Search

tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc

// the arguments into the order expected by XlaLaunch computations:
// 1) arguments
// 2) resource variable arguments
// See the documentation of EncapsulateSubgraphsInFunctions for the meaning
// of the arguments.
//
// TODO(b/113166435): Ordering constraints on XlaLaunch op can be relaxed.
Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Tue Mar 12 06:33:33 UTC 2024

- 15.1K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/mlir/tensorflow/transforms/tf_device_passes.td

    This pass rewrites `tf.PartitionedCall` and `tf.StatefulPartitionedCall`
    operations with `_xla_compile_device_type` attribute in a
    `tf_device.cluster` into `tf.XlaLaunch` operations. This makes the attached
    function execute with XLA. `tf.XlaLaunch` requires resource-type arguments
    come at the end, so this pass rewrites the called function if necessary.
    This pass assumes there are no nested `tf_device.cluster`s so we don't end

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Wed Apr 17 18:52:57 UTC 2024

- 12.5K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/xla_device_ops.h

namespace tensorflow {

// Dummy OpKernel, used for kernels assigned to an XLA device that should be
// compiled. Should never be called at runtime since such ops should be
// rewritten to a XlaLaunch op. If it is called, it means the placer placed an
// operator on an XLA device but the compiler did not compile it.
class XlaDeviceDummyOp : public OpKernel {
 public:
  explicit XlaDeviceDummyOp(OpKernelConstruction* ctx);

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Tue Nov 23 19:28:25 UTC 2021

- 17.1K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/flags.h

   public:
    // Allow using Device API (PjRt) for `device_type` in the XlaLaunch op.
    // Please note that `enabled_for_xla_launch_` needs to be true in addition
    // to the `device_type` being allowed in order to use the Device API for
    // single device compilation and execution in the XlaLaunch op.
    void AllowForDeviceInXlaLaunch(const DeviceType& device_type) {

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Wed Apr 17 18:52:57 UTC 2024

- 14.5K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/kernels/xla_ops.cc

  if (ctx->has_input(i) || ctx->has_input(++i)) {
    ctx->set_output(0, ctx->input(i));
  }
}

REGISTER_KERNEL_BUILDER(Name("XlaLaunch").Device(DEVICE_CPU), XlaLocalLaunchOp);

REGISTER_KERNEL_BUILDER(Name("XlaLaunchV2").Device(DEVICE_CPU), XlaLaunchV2Op);

REGISTER_KERNEL_BUILDER(Name("XlaLaunch")
                            .Device(DEVICE_GPU)
                            .HostMemory("constants")

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Fri May 17 22:46:36 UTC 2024

- 41.4K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir

  %unused = "tf.TestAsyncIdentity"(%x) {__op_key = 0: i32, T = i32} : (tensor<i32>) -> tensor<i32>
  // CHECK: mlrt.await_all_control [[unused]]
  return %x : tensor<i32>
}

// -----

// Test for XlaLaunch

func.func private @xla_func_0(%arg0: tensor<1x3xf32>, %arg1: tensor<1x3xf32>) -> tensor<1x3xf32> attributes {tf._XlaMustCompile = true, tf._noinline = true, tf._original_func_name = "should_not_be_used"} {

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Fri May 31 20:44:15 UTC 2024

- 24.7K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/xla_launch_util.h

                              int device_ordinal, bool allocate_xla_tensors,
                              bool use_multiple_streams);

  // Builds a XlaCompiler::Argument vector from the arguments to an XlaLaunch
  // op.
  // Precondition: variables in `variable_args` are locked.
  static absl::StatusOr<std::vector<XlaCompiler::Argument>>
  BuildXlaCompilerArguments(absl::Span<int const> must_be_constant_idxs,

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Wed Feb 21 09:53:30 UTC 2024

- 11.8K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc

  auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
  auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);

  NameAttrList function;
  function.set_name("launch0");
  auto launch = ops::XlaLaunch(
      scope.WithOpName("launch0").WithDevice("/gpu:0"),
      std::initializer_list<Input>{}, std::initializer_list<Input>{a, b, c, d},
      std::initializer_list<Input>{u, v, w},

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Mon Oct 16 18:03:15 UTC 2023

- 14.7K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/jit/compilability_check_util.h

    const tensorflow::FunctionBody* fbody,
    absl::Span<int const> constant_arg_indices,
    absl::Span<int const> resource_arg_indices);

// Returns output memory types.
//
// XlaLaunch kernel keeps all outputs (including constants, which it copies),
// in device memory except for resources.
tensorflow::MemoryTypeVector GetOutputMemoryTypes(
    const tensorflow::FunctionBody* fbody);

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Wed Sep 06 19:12:29 UTC 2023

- 14.9K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/compiler/mlir/tf2xla/internal/passes/clustering_passes.td

    with `_xla_compile_device_type` attribute into a `tf_device.cluster`.
    Notice this pass will only rewrite the outermost call if there are nested
    calls to avoid nested `tf.XlaLaunch` operations from being created later.

    For example, the following code

    ```mlir
    func.func @main() -> tensor<i32> {

Registered: Sun Jun 16 05:45:23 UTC 2024

- Last Modified: Tue Apr 30 02:01:13 UTC 2024

- 19.8K bytes

- Viewed (0)

Search Options