Create timeseries dataframe with random data Parameters ---------- start: datetime (or datetime-like string) Start of time series end: datetime (or datetime-like string) End of time series dtypes: dict (optional) Mapping of column names to types.
(
start="2000-01-01",
end="2000-01-31",
freq="1s",
partition_freq="1D",
dtypes=None,
seed=None,
**kwargs,
)
| 184 | |
| 185 | |
| 186 | def timeseries( |
| 187 | start="2000-01-01", |
| 188 | end="2000-01-31", |
| 189 | freq="1s", |
| 190 | partition_freq="1D", |
| 191 | dtypes=None, |
| 192 | seed=None, |
| 193 | **kwargs, |
| 194 | ): |
| 195 | """Create timeseries dataframe with random data |
| 196 | |
| 197 | Parameters |
| 198 | ---------- |
| 199 | start: datetime (or datetime-like string) |
| 200 | Start of time series |
| 201 | end: datetime (or datetime-like string) |
| 202 | End of time series |
| 203 | dtypes: dict (optional) |
| 204 | Mapping of column names to types. |
| 205 | Valid types include {float, int, str, 'category'} |
| 206 | freq: string |
| 207 | String like '2s' or '1H' or '12W' for the time series frequency |
| 208 | partition_freq: string |
| 209 | String like '1M' or '2Y' to divide the dataframe into partitions |
| 210 | seed: int (optional) |
| 211 | Randomstate seed |
| 212 | kwargs: |
| 213 | Keywords to pass down to individual column creation functions. |
| 214 | Keywords should be prefixed by the column name and then an underscore. |
| 215 | |
| 216 | Examples |
| 217 | -------- |
| 218 | >>> from dask.dataframe.dask_expr.datasets import timeseries |
| 219 | >>> df = timeseries( |
| 220 | ... start='2000', end='2010', |
| 221 | ... dtypes={'value': float, 'name': str, 'id': int}, |
| 222 | ... freq='2h', partition_freq='1D', seed=1 |
| 223 | ... ) |
| 224 | >>> df.head() # doctest: +SKIP |
| 225 | id name value |
| 226 | 2000-01-01 00:00:00 969 Jerry -0.309014 |
| 227 | 2000-01-01 02:00:00 1010 Ray -0.760675 |
| 228 | 2000-01-01 04:00:00 1016 Patricia -0.063261 |
| 229 | 2000-01-01 06:00:00 960 Charlie 0.788245 |
| 230 | 2000-01-01 08:00:00 1031 Kevin 0.466002 |
| 231 | """ |
| 232 | if dtypes is None: |
| 233 | dtypes = {"name": "string", "id": int, "x": float, "y": float} |
| 234 | |
| 235 | if seed is None: |
| 236 | seed = np.random.randint(2e9) |
| 237 | |
| 238 | expr = Timeseries( |
| 239 | start, |
| 240 | end, |
| 241 | dtypes, |
| 242 | freq, |
| 243 | partition_freq, |
searching dependent graphs…