Collective - Code Search

tensorflow/c/eager/parallel_device/parallel_device_lib_test.cc

  std::unique_ptr<ParallelTensor> run_collective =
      parallel_device.ScalarsFromSequence<bool>({true, true}, context.get(),
                                                status.get());
  ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get());
  auto outputs = parallel_device.Execute(
      context.get(), {reduced_values.get(), run_collective.get()},
      "AssertAndCollective", TFE_OpGetAttrs(call_op.get()),

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Thu Jul 08 23:47:35 GMT 2021

- 15.3K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/c/c_api_experimental.h

                                                  TF_Status* status);

// Checks the health of collective ops peers. Explicit health check is needed in
// multi worker collective ops to detect failures in the cluster.  If a peer is
// down, collective ops may hang.
TF_CAPI_EXPORT extern void TFE_CollectiveOpsCheckPeerHealth(
    TFE_Context* ctx, const char* task, int64_t timeout_in_ms,
    TF_Status* status);

C

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Thu Apr 27 21:07:00 GMT 2023

- 15.1K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/c/c_api_experimental.cc

                                                  TF_Status* status) {
  tensorflow::EagerContext* context =
      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
  auto collective_executor_handle = context->GetCollectiveExecutorHandle();
  collective_executor_handle->get()->StartAbort(status->status);
}

TF_CAPI_EXPORT extern void TFE_CollectiveOpsCheckPeerHealth(
    TFE_Context* ctx, const char* task, int64_t timeout_in_ms,

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Mon Apr 15 03:35:10 GMT 2024

- 29.4K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/c/eager/parallel_device/parallel_device_lib.h

  // less synchronization than a thread pool would for this task, since Execute
  // acquires each thread in order (and so only one Execute will schedule
  // blocking collective operations at a time), and avoids some dynamic
  // allocation/scheduling.
  //
  // TODO(allenl): Keep a map from outer thread to list of inner threads rather

C

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Tue Apr 25 15:21:13 GMT 2023

- 12.9K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/c/eager/parallel_device/parallel_device.cc

        // just copy-off but includes a sum) and consideration of performance.
        //
        // TODO(allenl): There may be smarter ways to do this copy in some
        // cases, i.e. with a collective broadcast. We'll need to be careful
        // about things that are taken as inputs on the host or on their
        // existing device (for multi-device functions).
        std::unique_ptr<ParallelTensor> parallel_tensor(

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Wed Mar 29 22:05:31 GMT 2023

- 18.3K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

tensorflow/c/eager/parallel_device/parallel_device_test.cc

  TensorHandlePtr parallel_value = CreatePerDeviceValues(
      context.get(), components, device_name, status.get());
  ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get());

  // Run a collective sum, so each component should now be the same.
  TensorHandlePtr reduced(
      CollectiveSum(context.get(), parallel_value.get(), 2, status.get()));
  ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get());

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Thu Jul 08 23:47:35 GMT 2021

- 29.3K bytes

- Viewed (1)

github.com/tensorflow/tensorflow

tensorflow/c/eager/parallel_device/parallel_device_lib.cc

                        const int in_flight_nodes_limit)
      : status_(TF_NewStatus()),
        // If the context's default exector is set to async, re-using that in
        // each thread would cause collectives to deadlock. For consistency we
        // create a new sync executor for every thread.
        //
        // TODO(allenl): We should have an async API that works with the
        // parallel device.

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Fri Feb 09 07:47:20 GMT 2024

- 25.4K bytes

- Viewed (1)

github.com/tensorflow/tensorflow

tensorflow/c/eager/c_api_test_util.cc

    job_def->mutable_tasks()->insert(
        {i, tensorflow::strings::StrCat("localhost:", port)});
  }
  auto* config = server_def.mutable_default_session_config();
  config->mutable_experimental()->set_collective_group_leader(
      tensorflow::strings::StrCat("/job:", job_name, "/replica:0/task:", 0));
  auto* rewrite_options =
      config->mutable_graph_options()->mutable_rewrite_options();

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Wed Feb 21 22:37:46 GMT 2024

- 23.5K bytes

- Viewed (2)

github.com/tensorflow/tensorflow

tensorflow/c/eager/c_api.cc

          opts->device_placement_policy),
      opts->async, device_mgr.release(),
      /*device_mgr_owned*/ true, std::move(r),
      /*cluster_flr=*/nullptr,
      /*collective_executor_mgr=*/nullptr,
      /*run_eager_op_as_function=*/opts->run_eager_op_as_function,
      /*jit_compile_rewrite=*/opts->jit_compile_rewrite);
#if !defined(IS_MOBILE_PLATFORM)
  eager_context->SetDistributedManager(

C++

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Tue Mar 12 20:00:09 GMT 2024

- 43.9K bytes

- Viewed (2)

github.com/tensorflow/tensorflow

RELEASE.md

            specifying a `delete_key` and `empty_key` that cannot be inserted
            into the table.
    *   Added support for specifying number of subdivisions in all reduce host
        collective. This parallelizes work on CPU and speeds up the collective
        performance. Default behavior is unchanged.
    *   Add an option `perturb_singular` to `tf.linalg.tridiagonal_solve` that

Plain Text

- Registered: Tue Apr 30 12:39:09 GMT 2024

- Last Modified: Mon Apr 29 19:17:57 GMT 2024

- 727.7K bytes

- Viewed (8)

Search Options