From b3b3d6baa2a8b9f0f3da989844b61915588cda76 Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 11:11:57 +0330 Subject: [PATCH 1/8] Add Tehran Stock Exchange source --- docs/source/readers/index.rst | 1 + docs/source/readers/tse.rst | 7 ++ docs/source/remote_data.rst | 21 ++++ pandas_datareader/__init__.py | 2 + pandas_datareader/data.py | 19 ++++ pandas_datareader/tse.py | 183 ++++++++++++++++++++++++++++++++++ 6 files changed, 233 insertions(+) create mode 100644 docs/source/readers/tse.rst create mode 100644 pandas_datareader/tse.py diff --git a/docs/source/readers/index.rst b/docs/source/readers/index.rst index 310f4175..cb30a714 100644 --- a/docs/source/readers/index.rst +++ b/docs/source/readers/index.rst @@ -22,3 +22,4 @@ Data Readers tsp world-bank yahoo + tse diff --git a/docs/source/readers/tse.rst b/docs/source/readers/tse.rst new file mode 100644 index 00000000..40ff8289 --- /dev/null +++ b/docs/source/readers/tse.rst @@ -0,0 +1,7 @@ +Tehran Stock Exchange +------------------------------------ + +.. py:module:: pandas_datareader.tse +.. autoclass:: TSEReader + :members: + :inherited-members: read diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index 70793d62..83cb0056 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -44,6 +44,7 @@ Currently the following sources are supported: - :ref:`Tiingo` - :ref:`World Bank` - :ref:`Yahoo Finance` + - :ref:`Tehran Stock Exchange` It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ. @@ -762,3 +763,23 @@ The following endpoints are available: dividends = web.DataReader('IBM', 'yahoo-dividends', start, end) dividends.head() + +.. _remote_data.tse: + +Tehran Stock Exchange +===================== +An interface to structure the information provided by +`Tehran Stock Exchange `_ + +.. ipython:: python + + import pandas_datareader.data as web + from datetime import datetime + start = datetime(2021, 1, 1) + end = dt.datetime.today() + f = web.DataReader("نوری", "tse", start, end) + f.head() + + # Multiple series: + multi = web.DataReader(["نوری", "برکت"], "tse", start, end) + multi.head() \ No newline at end of file diff --git a/pandas_datareader/__init__.py b/pandas_datareader/__init__.py index a792a806..69c61016 100644 --- a/pandas_datareader/__init__.py +++ b/pandas_datareader/__init__.py @@ -15,6 +15,7 @@ get_data_quandl, get_data_stooq, get_data_tiingo, + get_data_tse, get_data_yahoo, get_data_yahoo_actions, get_iex_book, @@ -40,6 +41,7 @@ "get_components_yahoo", "get_data_enigma", "get_data_famafrench", + "get_data_tse", "get_data_yahoo", "get_data_yahoo_actions", "get_quote_yahoo", diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index c2d6223a..e9b66673 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -33,6 +33,7 @@ TiingoIEXHistoricalReader, TiingoQuoteReader, ) +from pandas_datareader.tse import TSEReader from pandas_datareader.yahoo.actions import YahooActionReader, YahooDivReader from pandas_datareader.yahoo.components import _get_data as get_components_yahoo from pandas_datareader.yahoo.daily import YahooDailyReader @@ -46,6 +47,7 @@ "get_data_fred", "get_data_moex", "get_data_quandl", + "get_data_tse", "get_data_yahoo", "get_data_yahoo_actions", "get_nasdaq_symbols", @@ -270,6 +272,10 @@ def get_iex_book(*args, **kwargs): return IEXDeep(*args, **kwargs).read() +def get_data_tse(*args, **kwargs): + return TSEReader(*args, **kwargs).read() + + @deprecate_kwarg("access_key", "api_key") def DataReader( name, @@ -360,6 +366,7 @@ def DataReader( "av-intraday", "econdb", "naver", + "tse", ] if data_source not in expected_source: @@ -668,6 +675,18 @@ def DataReader( session=session, ).read() + elif data_source == "tse": + return TSEReader( + symbols=name, + start=start, + end=end, + retry_count=retry_count, + pause=pause, + session=session, + adjust_price=False, + interval="d", + ).read() + else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg) diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py new file mode 100644 index 00000000..5f7d3a11 --- /dev/null +++ b/pandas_datareader/tse.py @@ -0,0 +1,183 @@ +import pandas as pd + +from pandas_datareader.base import _BaseReader +from pandas_datareader.compat import is_list_like +from pandas_datareader._utils import RemoteDataError, SymbolWarning + + +class TSEReader(_BaseReader): + """ + Tehran stock exchange daily data + + Returns DataFrame of historical data from the Tehran Stock Exchange + open data service, over date range, start to end. + + Parameters + ---------- + symbols : {int, str, List[str], List[int]} + The symbols can be persian symbol code or instrument id. + This argument can be obtained from tsetmc.com site. + start : string, int, date, datetime, Timestamp + Starting date. Parses many different kind of date + default value is 5 years ago + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, int, date, datetime, Timestamp + Ending date + retry_count : int, default 3 + Number of times to retry query request. + pause : float, default 0.1 + Time, in seconds, of the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used. + adjust_price : bool, default False + If True, adjusts all prices in hist_data ('Open', 'High', 'Low', + 'Close') based on 'Adj Close' nad 'Yesterday' price. + interval: string, d, w, m for daily, weekly, monthly + """ + + def __init__( + self, + symbols=None, + start=None, + end=None, + retry_count=3, + pause=0.1, + session=None, + adjust_price=False, + interval="d", + ): + super().__init__( + symbols=symbols, + start=start, + end=end, + retry_count=retry_count, + pause=pause, + session=session, + ) + + # Ladder up the wait time between subsequent requests to improve + # probability of a successful retry + self.pause_multiplier = 2.5 + + self.adjust_price = adjust_price + self.interval = interval + + if self.interval not in ["d", "w", "m"]: + raise ValueError( + "Invalid interval: valid values are 'd', 'w' and 'm'. " + ) + + @property + def url(self): + """API URL""" + + return ("http://www.tsetmc.com/tsev2/data/" + "Export-txt.aspx?t=i&a=1&b=0&i={}") + + def read(self): + """ + Read data from connector + """ + try: + return self._read() + finally: + self.close() + + def _read(self): + """ + read data from many URLs if necessary and + joins into one DataFrame + """ + indexes = self._symbol_search_request(self.symbols) + + urls = [self.url.format(indexes[n]) for n in indexes] + + def _req(url, n): + return self._read_single_request(n, url, self.params) + + dfs = {n: _req(url, n) for url, n in zip(urls, indexes)} + + return dfs + + def _read_single_request(self, symbol, url, params): + """read one data from specified URL""" + + out = self._read_url_as_StringIO(url, params=None) + try: + df = pd.read_csv(out) + except ValueError: + out.seek(0) + msg = out.read() + raise RemoteDataError( + "message: {}, symbol: {}".format(msg, symbol) + ) from None + + df = df.iloc[::-1] + HISTORY_FIELD_MAPPINGS = { + "": "date", + "": "open", + "": "high", + "": "low", + "": "close", + "": "volume", + "": "value", + "": "count", + "": "adjClose", + "": "yesterday", + } + df = df.rename(columns=HISTORY_FIELD_MAPPINGS) + df = df.reindex(HISTORY_FIELD_MAPPINGS.values(), axis=1) + + if "date" in df: + df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") + df = df.set_index("date") + df = df[self.start:self.end] + return df + + def _symbol_search_request(self, symbols): + """read one data from specified URL""" + MARKET_WATCH_INIT_URL = ( + "http://www.tsetmc.com/tsev2/data/MarketWatchInit.aspx?h=0&r=0" + ) + if not is_list_like(symbols): + names = [symbols] + else: + names = symbols + out = self._read_url_as_StringIO(MARKET_WATCH_INIT_URL, params=None) + out.seek(0) + msg = out.read() + # response contain different groups for different data + response_groups = msg.split("@") + if len(response_groups) < 3: + raise RemoteDataError( + "response groups: {}, symbol: {}".format( + len(response_groups), + symbols + ) + ) from None + + symbols_data = response_groups[2].split(";") + + market_symbols = {} + for symbol_data in symbols_data: + data = symbol_data.split(",") + market_symbols[ + self._replace_arabic(data[2]).replace('\u200c', '') + ] = self._replace_arabic(data[0]) + + indexes = {} + for name in names: + try: + if name.isnumeric(): + indexes[name] = name + else: + indexes[name] = market_symbols[name] + except KeyError: + raise SymbolWarning( + "{} not found".format(name) + ) from None + + return indexes + + def _replace_arabic(self, string: str): + return string.replace('ك', 'ک').replace('ي', 'ی').strip() From ff644a2d706eae8c3cd87060f2807c1a978f0e6c Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 12:22:03 +0330 Subject: [PATCH 2/8] use DailyBaseReader --- pandas_datareader/data.py | 1 + pandas_datareader/tse.py | 169 ++++++++++++++++++-------------------- 2 files changed, 80 insertions(+), 90 deletions(-) diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index e9b66673..d4d4fcf9 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -684,6 +684,7 @@ def DataReader( pause=pause, session=session, adjust_price=False, + chunksize=10, interval="d", ).read() diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py index 5f7d3a11..3b135010 100644 --- a/pandas_datareader/tse.py +++ b/pandas_datareader/tse.py @@ -1,11 +1,28 @@ import pandas as pd -from pandas_datareader.base import _BaseReader -from pandas_datareader.compat import is_list_like +from pandas_datareader.base import _DailyBaseReader from pandas_datareader._utils import RemoteDataError, SymbolWarning - -class TSEReader(_BaseReader): +_TSE_TICKER_URL = "http://www.tsetmc.com/tsev2/data/Export-txt.aspx" +_TSE_MARKET_WATCH_INIT_URL = ( + "http://www.tsetmc.com/tsev2/data/MarketWatchInit.aspx?h=0&r=0" +) +_TSE_FIELD_MAPPINGS = { + "": "Date", + "": "Open", + "": "High", + "": "Low", + "": "Close", + "": "Volume", + "": "Value", + "": "Count", + "": "AdjClose", + "": "Yesterday", +} +_tse_ticker_cache = None + + +class TSEReader(_DailyBaseReader): """ Tehran stock exchange daily data @@ -44,6 +61,7 @@ def __init__( pause=0.1, session=None, adjust_price=False, + chunksize=1, interval="d", ): super().__init__( @@ -53,6 +71,7 @@ def __init__( retry_count=retry_count, pause=pause, session=session, + chunksize=chunksize, ) # Ladder up the wait time between subsequent requests to improve @@ -70,114 +89,84 @@ def __init__( @property def url(self): """API URL""" + return (_TSE_TICKER_URL) - return ("http://www.tsetmc.com/tsev2/data/" - "Export-txt.aspx?t=i&a=1&b=0&i={}") - - def read(self): - """ - Read data from connector - """ - try: - return self._read() - finally: - self.close() - - def _read(self): - """ - read data from many URLs if necessary and - joins into one DataFrame - """ - indexes = self._symbol_search_request(self.symbols) - - urls = [self.url.format(indexes[n]) for n in indexes] + def _get_params(self, symbol): + # This needed because yahoo returns data shifted by 4 hours ago. + index = self._symbol_search_request(symbol) - def _req(url, n): - return self._read_single_request(n, url, self.params) - - dfs = {n: _req(url, n) for url, n in zip(urls, indexes)} - - return dfs + params = { + "t": "i", + "a": 1, + "b": 0, + "i": index, + } + return params - def _read_single_request(self, symbol, url, params): + def _read_one_data(self, url, params): """read one data from specified URL""" - out = self._read_url_as_StringIO(url, params=None) + out = self._read_url_as_StringIO(url, params) try: df = pd.read_csv(out) except ValueError: out.seek(0) msg = out.read() raise RemoteDataError( - "message: {}, symbol: {}".format(msg, symbol) + "message: {}, symbol: {}".format(msg, params.i) ) from None df = df.iloc[::-1] - HISTORY_FIELD_MAPPINGS = { - "": "date", - "": "open", - "": "high", - "": "low", - "": "close", - "": "volume", - "": "value", - "": "count", - "": "adjClose", - "": "yesterday", - } - df = df.rename(columns=HISTORY_FIELD_MAPPINGS) - df = df.reindex(HISTORY_FIELD_MAPPINGS.values(), axis=1) + df = df.rename(columns=_TSE_FIELD_MAPPINGS) + df = df.reindex(_TSE_FIELD_MAPPINGS.values(), axis=1) - if "date" in df: - df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") - df = df.set_index("date") + if "Date" in df: + df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d") + df = df.set_index("Date") df = df[self.start:self.end] return df - def _symbol_search_request(self, symbols): + def _symbol_search_request(self, symbol): """read one data from specified URL""" - MARKET_WATCH_INIT_URL = ( - "http://www.tsetmc.com/tsev2/data/MarketWatchInit.aspx?h=0&r=0" - ) - if not is_list_like(symbols): - names = [symbols] - else: - names = symbols - out = self._read_url_as_StringIO(MARKET_WATCH_INIT_URL, params=None) - out.seek(0) - msg = out.read() - # response contain different groups for different data - response_groups = msg.split("@") - if len(response_groups) < 3: - raise RemoteDataError( - "response groups: {}, symbol: {}".format( - len(response_groups), - symbols - ) - ) from None + global _tse_ticker_cache - symbols_data = response_groups[2].split(";") - - market_symbols = {} - for symbol_data in symbols_data: - data = symbol_data.split(",") - market_symbols[ - self._replace_arabic(data[2]).replace('\u200c', '') - ] = self._replace_arabic(data[0]) - - indexes = {} - for name in names: - try: - if name.isnumeric(): - indexes[name] = name - else: - indexes[name] = market_symbols[name] - except KeyError: - raise SymbolWarning( - "{} not found".format(name) + if _tse_ticker_cache is None: + out = self._read_url_as_StringIO( + _TSE_MARKET_WATCH_INIT_URL, + params=None + ) + out.seek(0) + msg = out.read() + # response contain different groups for different data + response_groups = msg.split("@") + if len(response_groups) < 3: + raise RemoteDataError( + "response groups: {}, symbol: {}".format( + len(response_groups), + symbol + ) ) from None - return indexes + symbols_data = response_groups[2].split(";") + + _tse_ticker_cache = {} + for symbol_data in symbols_data: + data = symbol_data.split(",") + _tse_ticker_cache[ + self._replace_arabic(data[2]).replace('\u200c', '') + ] = self._replace_arabic(data[0]) + + try: + if symbol.isnumeric(): + index = symbol + else: + index = _tse_ticker_cache[symbol] + except KeyError: + raise SymbolWarning( + "{} not found".format(symbol) + ) from None + + return index def _replace_arabic(self, string: str): return string.replace('ك', 'ک').replace('ي', 'ی').strip() From 4c55428ba42b9b533e0b959e91d5c31cb5dae2fc Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 12:47:12 +0330 Subject: [PATCH 3/8] add adjust price option --- pandas_datareader/tse.py | 46 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py index 3b135010..9551a807 100644 --- a/pandas_datareader/tse.py +++ b/pandas_datareader/tse.py @@ -48,7 +48,7 @@ class TSEReader(_DailyBaseReader): requests.sessions.Session instance to be used. adjust_price : bool, default False If True, adjusts all prices in hist_data ('Open', 'High', 'Low', - 'Close') based on 'Adj Close' nad 'Yesterday' price. + 'Close') based on 'Adj Close' and 'Yesterday' price. interval: string, d, w, m for daily, weekly, monthly """ @@ -116,10 +116,13 @@ def _read_one_data(self, url, params): "message: {}, symbol: {}".format(msg, params.i) ) from None - df = df.iloc[::-1] + df = df.iloc[::-1].reset_index(drop=True) df = df.rename(columns=_TSE_FIELD_MAPPINGS) df = df.reindex(_TSE_FIELD_MAPPINGS.values(), axis=1) + if(self.adjust_price): + df = _adjust_prices(df) + if "Date" in df: df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d") df = df.set_index("Date") @@ -170,3 +173,42 @@ def _symbol_search_request(self, symbol): def _replace_arabic(self, string: str): return string.replace('ك', 'ک').replace('ي', 'ی').strip() + + +def _adjust_prices(hist_data, price_list=None): + """ + Return modifed DataFrame with adjusted prices based on + 'Adj Close' and 'Yesterday' price + """ + if hist_data.empty: + return hist_data + if not isinstance(hist_data.index, pd.core.indexes.range.RangeIndex): + raise TypeError( + "Error in adjusting price; index type must be RangeIndex" + ) from None + if price_list is None: + price_list = ["Open", "High", "Low", "Close", "AdjClose", "Yesterday"] + + data = hist_data.copy() + step = data.index.step + diff = list(data.index[data.shift(1).AdjClose != data.Yesterday]) + if len(diff) > 0: + diff.pop(0) + ratio = 1 + ratio_list = [] + for i in diff[::-1]: + ratio *= ( + data.loc[i, 'Yesterday'] / data.shift(1).loc[i, 'AdjClose'] + ) + ratio_list.insert(0, ratio) + for i, k in enumerate(diff): + if i == 0: + start = data.index.start + else: + start = diff[i-1] + end = diff[i]-step + data.loc[start:end, price_list] = round( + data.loc[start:end, price_list] * ratio_list[i] + ) + + return data From 8830d1c7cf8e17c52ee0b789fbc8a567237ed7b4 Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 13:02:38 +0330 Subject: [PATCH 4/8] resample historical price weekly or monthly --- pandas_datareader/tse.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py index 9551a807..6d0103f8 100644 --- a/pandas_datareader/tse.py +++ b/pandas_datareader/tse.py @@ -127,6 +127,15 @@ def _read_one_data(self, url, params): df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d") df = df.set_index("Date") df = df[self.start:self.end] + if(self.interval == 'w'): + ohlc = df['Close'].resample('w-sat').ohlc() + ohlc['volume'] = df['Volume'].resample('w-sat').sum() + df = ohlc + elif self.interval == 'm': + ohlc = df['Close'].resample('m').ohlc() + ohlc['volume'] = df['Volume'].resample('m').sum() + df = ohlc + return df def _symbol_search_request(self, symbol): From a7999d816d958ab20c38f3d549820fd49cb3b61a Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 14:05:51 +0330 Subject: [PATCH 5/8] Create test for tse source --- pandas_datareader/tests/test_tse.py | 57 +++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 pandas_datareader/tests/test_tse.py diff --git a/pandas_datareader/tests/test_tse.py b/pandas_datareader/tests/test_tse.py new file mode 100644 index 00000000..cb092c25 --- /dev/null +++ b/pandas_datareader/tests/test_tse.py @@ -0,0 +1,57 @@ +from datetime import datetime + +import pandas as pd +import pytest + +from pandas_datareader import data as web + +pytestmark = pytest.mark.stable + + +class TestTSE(object): + @property + def start(self): + return datetime(2021, 3, 1) + + @property + def end(self): + return datetime(2021, 9, 15) + + def test_tse(self): + df = web.DataReader("نوری", "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 + + def test_tse_int_symbol(self): + df = web.DataReader("19040514831923530", "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 + + def test_tse_multi(self): + names = ["خصدرا", "زاگرس"] + df = web.DataReader(names, "tse", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert list(df.columns.get_level_values(1)[0:len(names)]) == names + assert len(df) == 126 + + def test_tse_multi_bad_series(self): + names = ["NOTAREALSERIES", "نوری", "ALSOFAKE"] + with pytest.raises(Exception): + web.DataReader(names, data_source="tse") + + def test_tse_raises_exception(self): + with pytest.raises(Exception): + web.DataReader("NON EXISTENT SERIES", "tse", self.start, self.end) + + def test_tse_helper(self): + df = web.get_data_tse("نوری", self.start, self.end) + assert df.index.name == "Date" + assert df.index[0] == pd.to_datetime(self.start) + assert df.index[-1] == pd.to_datetime(self.end) + assert len(df) == 123 From ef0f235c67875a4422330f1454cd20ff59ecba6a Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 14:12:48 +0330 Subject: [PATCH 6/8] fix black syntax --- pandas_datareader/tests/test_tse.py | 2 +- pandas_datareader/tse.py | 46 +++++++++++------------------ 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/pandas_datareader/tests/test_tse.py b/pandas_datareader/tests/test_tse.py index cb092c25..92062ddd 100644 --- a/pandas_datareader/tests/test_tse.py +++ b/pandas_datareader/tests/test_tse.py @@ -37,7 +37,7 @@ def test_tse_multi(self): assert df.index.name == "Date" assert df.index[0] == pd.to_datetime(self.start) assert df.index[-1] == pd.to_datetime(self.end) - assert list(df.columns.get_level_values(1)[0:len(names)]) == names + assert list(df.columns.get_level_values(1)[0 : len(names)]) == names assert len(df) == 126 def test_tse_multi_bad_series(self): diff --git a/pandas_datareader/tse.py b/pandas_datareader/tse.py index 6d0103f8..88dfb078 100644 --- a/pandas_datareader/tse.py +++ b/pandas_datareader/tse.py @@ -82,14 +82,12 @@ def __init__( self.interval = interval if self.interval not in ["d", "w", "m"]: - raise ValueError( - "Invalid interval: valid values are 'd', 'w' and 'm'. " - ) + raise ValueError("Invalid interval: valid values are 'd', 'w' and 'm'. ") @property def url(self): """API URL""" - return (_TSE_TICKER_URL) + return _TSE_TICKER_URL def _get_params(self, symbol): # This needed because yahoo returns data shifted by 4 hours ago. @@ -120,20 +118,20 @@ def _read_one_data(self, url, params): df = df.rename(columns=_TSE_FIELD_MAPPINGS) df = df.reindex(_TSE_FIELD_MAPPINGS.values(), axis=1) - if(self.adjust_price): + if self.adjust_price: df = _adjust_prices(df) if "Date" in df: df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d") df = df.set_index("Date") - df = df[self.start:self.end] - if(self.interval == 'w'): - ohlc = df['Close'].resample('w-sat').ohlc() - ohlc['volume'] = df['Volume'].resample('w-sat').sum() + df = df[self.start : self.end] + if self.interval == "w": + ohlc = df["Close"].resample("w-sat").ohlc() + ohlc["volume"] = df["Volume"].resample("w-sat").sum() df = ohlc - elif self.interval == 'm': - ohlc = df['Close'].resample('m').ohlc() - ohlc['volume'] = df['Volume'].resample('m').sum() + elif self.interval == "m": + ohlc = df["Close"].resample("m").ohlc() + ohlc["volume"] = df["Volume"].resample("m").sum() df = ohlc return df @@ -143,10 +141,7 @@ def _symbol_search_request(self, symbol): global _tse_ticker_cache if _tse_ticker_cache is None: - out = self._read_url_as_StringIO( - _TSE_MARKET_WATCH_INIT_URL, - params=None - ) + out = self._read_url_as_StringIO(_TSE_MARKET_WATCH_INIT_URL, params=None) out.seek(0) msg = out.read() # response contain different groups for different data @@ -154,8 +149,7 @@ def _symbol_search_request(self, symbol): if len(response_groups) < 3: raise RemoteDataError( "response groups: {}, symbol: {}".format( - len(response_groups), - symbol + len(response_groups), symbol ) ) from None @@ -165,7 +159,7 @@ def _symbol_search_request(self, symbol): for symbol_data in symbols_data: data = symbol_data.split(",") _tse_ticker_cache[ - self._replace_arabic(data[2]).replace('\u200c', '') + self._replace_arabic(data[2]).replace("\u200c", "") ] = self._replace_arabic(data[0]) try: @@ -174,14 +168,12 @@ def _symbol_search_request(self, symbol): else: index = _tse_ticker_cache[symbol] except KeyError: - raise SymbolWarning( - "{} not found".format(symbol) - ) from None + raise SymbolWarning("{} not found".format(symbol)) from None return index def _replace_arabic(self, string: str): - return string.replace('ك', 'ک').replace('ي', 'ی').strip() + return string.replace("ك", "ک").replace("ي", "ی").strip() def _adjust_prices(hist_data, price_list=None): @@ -206,16 +198,14 @@ def _adjust_prices(hist_data, price_list=None): ratio = 1 ratio_list = [] for i in diff[::-1]: - ratio *= ( - data.loc[i, 'Yesterday'] / data.shift(1).loc[i, 'AdjClose'] - ) + ratio *= data.loc[i, "Yesterday"] / data.shift(1).loc[i, "AdjClose"] ratio_list.insert(0, ratio) for i, k in enumerate(diff): if i == 0: start = data.index.start else: - start = diff[i-1] - end = diff[i]-step + start = diff[i - 1] + end = diff[i] - step data.loc[start:end, price_list] = round( data.loc[start:end, price_list] * ratio_list[i] ) From 125853e04e702b823b0412a2826932292fa816f6 Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 15:52:29 +0330 Subject: [PATCH 7/8] Add description to get_data_tse function --- pandas_datareader/data.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index d4d4fcf9..4c8a6c45 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -273,6 +273,34 @@ def get_iex_book(*args, **kwargs): def get_data_tse(*args, **kwargs): + """ + Tehran stock exchange daily data + + Returns DataFrame of historical data from the Tehran Stock Exchange + open data service, over date range, start to end. + + Parameters + ---------- + symbols : {int, str, List[str], List[int]} + The symbols can be persian symbol code or instrument id. + This argument can be obtained from tsetmc.com site. + start : string, int, date, datetime, Timestamp + Starting date. Parses many different kind of date + default value is 5 years ago + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, int, date, datetime, Timestamp + Ending date + retry_count : int, default 3 + Number of times to retry query request. + pause : float, default 0.1 + Time, in seconds, of the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used. + adjust_price : bool, default False + If True, adjusts all prices in hist_data ('Open', 'High', 'Low', + 'Close') based on 'Adj Close' and 'Yesterday' price. + interval: string, d, w, m for daily, weekly, monthly + """ return TSEReader(*args, **kwargs).read() From d4324c823c1af46329a9bf39e0fb1f9906b4b77e Mon Sep 17 00:00:00 2001 From: alised Date: Wed, 22 Sep 2021 15:59:25 +0330 Subject: [PATCH 8/8] Update documention to describe adjuct price and resamle Data in tse --- docs/source/remote_data.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index 83cb0056..9ead45e0 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -780,6 +780,14 @@ An interface to structure the information provided by f = web.DataReader("نوری", "tse", start, end) f.head() + # Adjust prices + f = web.get_data_tse("نوری", start, end, adjust_price=True) + f.head() + + # Resamle Close price weekly or monthly + f = web.get_data_tse("نوری", start, end, interval="m") + f.head() + # Multiple series: multi = web.DataReader(["نوری", "برکت"], "tse", start, end) multi.head() \ No newline at end of file