Manipulating Time Series Data in Python
Stefan Jansen
Founder & Lead Data Scientist at Applied Artificial Intelligence
.cumprod()from numpy.random import normal, seedfrom scipy.stats import normseed(42)random_returns = normal(loc=0, scale=0.01, size=1000)sns.distplot(random_returns, fit=norm, kde=False)

return_series = pd.Series(random_returns)random_prices = return_series.add(1).cumprod().sub(1)random_prices.mul(100).plot()

data = pd.read_csv('sp500.csv', parse_dates=['date'], index_col='date')data['returns'] = data.SP500.pct_change()data.plot(subplots=True)

sns.distplot(data.returns.dropna().mul(100), fit=norm)

from numpy.random import choicesample = data.returns.dropna()n_obs = data.returns.count()random_walk = choice(sample, size=n_obs)random_walk = pd.Series(random_walk, index=sample.index)random_walk.head()
DATE
2007-05-29 -0.008357
2007-05-30 0.003702
2007-05-31 -0.013990
2007-06-01 0.008096
2007-06-04 0.013120
start = data.SP500.first('D')
DATE
2007-05-25 1515.73
Name: SP500, dtype: float64
sp500_random = start.append(random_walk.add(1))sp500_random.head())
DATE
2007-05-25 1515.730000
2007-05-29 0.998290
2007-05-30 0.995190
2007-05-31 0.997787
2007-06-01 0.983853
dtype: float64
data['SP500_random'] = sp500_random.cumprod()data[['SP500', 'SP500_random']].plot()

Manipulating Time Series Data in Python