Initialize DGL's distributed module This function initializes DGL's distributed module. It acts differently in server or client modes. In the server mode, it runs the server code and never returns. In the client mode, it builds connections with servers for communication and creates
(
ip_config,
max_queue_size=MAX_QUEUE_SIZE,
net_type=None,
num_worker_threads=1,
use_graphbolt=False,
)
| 206 | |
| 207 | |
| 208 | def initialize( |
| 209 | ip_config, |
| 210 | max_queue_size=MAX_QUEUE_SIZE, |
| 211 | net_type=None, |
| 212 | num_worker_threads=1, |
| 213 | use_graphbolt=False, |
| 214 | ): |
| 215 | """Initialize DGL's distributed module |
| 216 | |
| 217 | This function initializes DGL's distributed module. It acts differently in server |
| 218 | or client modes. In the server mode, it runs the server code and never returns. |
| 219 | In the client mode, it builds connections with servers for communication and |
| 220 | creates worker processes for distributed sampling. |
| 221 | |
| 222 | Parameters |
| 223 | ---------- |
| 224 | ip_config: str |
| 225 | File path of ip_config file |
| 226 | max_queue_size : int |
| 227 | Maximal size (bytes) of client queue buffer (~20 GB on default). |
| 228 | |
| 229 | Note that the 20 GB is just an upper-bound and DGL uses zero-copy and |
| 230 | it will not allocate 20GB memory at once. |
| 231 | net_type : str, optional |
| 232 | [Deprecated] Networking type, can be 'socket' only. |
| 233 | num_worker_threads: int |
| 234 | The number of OMP threads in each sampler process. |
| 235 | use_graphbolt: bool, optional |
| 236 | Whether to use GraphBolt for distributed train. |
| 237 | |
| 238 | Note |
| 239 | ---- |
| 240 | Users have to invoke this API before any DGL's distributed API and framework-specific |
| 241 | distributed API. For example, when used with Pytorch, users have to invoke this function |
| 242 | before Pytorch's `pytorch.distributed.init_process_group`. |
| 243 | """ |
| 244 | print( |
| 245 | f"Initialize the distributed services with graphbolt: {use_graphbolt}" |
| 246 | ) |
| 247 | if net_type is not None: |
| 248 | dgl_warning( |
| 249 | "net_type is deprecated and will be removed in future release." |
| 250 | ) |
| 251 | if os.environ.get("DGL_ROLE", "client") == "server": |
| 252 | from .dist_graph import DistGraphServer |
| 253 | |
| 254 | assert ( |
| 255 | os.environ.get("DGL_SERVER_ID") is not None |
| 256 | ), "Please define DGL_SERVER_ID to run DistGraph server" |
| 257 | assert ( |
| 258 | os.environ.get("DGL_IP_CONFIG") is not None |
| 259 | ), "Please define DGL_IP_CONFIG to run DistGraph server" |
| 260 | assert ( |
| 261 | os.environ.get("DGL_NUM_SERVER") is not None |
| 262 | ), "Please define DGL_NUM_SERVER to run DistGraph server" |
| 263 | assert ( |
| 264 | os.environ.get("DGL_NUM_CLIENT") is not None |
| 265 | ), "Please define DGL_NUM_CLIENT to run DistGraph server" |
nothing calls this directly
no test coverage detected