{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dolphindb as ddb\n", "s = ddb.session()\n", "s.connect(\"localhost\", 8848, 'admin', '123456')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import dolphindb as ddb\n", "import dolphindb.settings as keys\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from pprint import pprint" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "array(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',\n", " '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',\n", " '2012-01-09', '2012-01-10'], dtype='datetime64[D]')\n" ] } ], "source": [ "s = ddb.session()\n", "s.connect('localhost', 8848, 'admin', '123456')\n", "\n", "dates = np.array(pd.date_range(start='20120101', end='20120110'), dtype='datetime64[D]')\n", "pprint(dates)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "dbPath = \"dfs://tsdb\"\n", "if s.existsDatabase(dbPath): s.dropDatabase(dbPath)\n", "\n", "# TODO: \n", "# 1. change `partitionType` to keys.COMPO\n", "# 2. specify the partition hierarchy\n", "db = s.database(\n", " dbName='mydb_tsdb', \n", " partitionType=keys.VALUE, \n", " partitions=dates, \n", " dbPath=dbPath,\n", " engine=\"TSDB\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " datetime sym val\n", "0 2012-01-01 AA 1\n", "1 2012-01-02 BB 2\n", "2 2012-01-04 BB 3\n", "3 2012-01-05 AA 4\n", "4 2012-01-08 BB 5\n" ] } ], "source": [ "df = pd.DataFrame({\n", " 'datetime' : np.array(\n", " np.array(\n", " ['2012-01-01T00:00:00', '2012-01-02T00:00:00', '2012-01-04T00:00:00', '2012-01-05T00:00:00', '2012-01-08T00:00:00'],\n", " dtype='datetime64')\n", " ),\n", " 'sym' : ['AA', 'BB', 'BB', 'AA', 'BB'],\n", " 'val' : range(1, 6)\n", "})\n", "pprint(df)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ '2012-01-01T00:00:00', '20120102-01-01T00:00:00',\n", " '2012-01-04T00:00:00', '2012-01-05T00:00:00',\n", " '2012-01-08T00:00:00'], dtype='datetime64[s]')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(\n", " ['2012-01-01', '20120102', '2012-01-04T00:00:00', '2012-01-05T00:00:00', '2012-01-08T00:00:00'],\n", " dtype='datetime64')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "t = s.table(data=df)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "pt = db.createPartitionedTable(table=t, tableName='pt', partitionColumns='datetime', sortColumns=[\"sym\", \"datetime\"])\n", "pt.append(t)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " datetime sym val\n", "0 2012-01-01 AA 1\n", "1 2012-01-01 AA 1\n", "2 2012-01-02 BB 2\n", "3 2012-01-02 BB 2\n", "4 2012-01-04 BB 3\n", "5 2012-01-04 BB 3\n", "6 2012-01-05 AA 4\n", "7 2012-01-05 AA 4\n", "8 2012-01-08 BB 5\n", "9 2012-01-08 BB 5\n" ] } ], "source": [ "re = s.loadTable(tableName='pt', dbPath=dbPath).toDF()\n", "pprint(re)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "s = ddb.session()\n", "s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n", "script='''\n", " rows=100000;\n", " testblock=table(\n", " take(1,rows) as id,\n", " take(`A,rows) as symbol,\n", " take(2020.08.01..2020.10.01,rows) as date, \n", " rand(50,rows) as size,\n", " rand(50.5,rows) as price\n", " );\n", "'''\n", "s.run(script)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 8192, iter: 1\n", " 16384, iter: 2\n", " 24576, iter: 3\n", " 32768, iter: 4\n", " 40960, iter: 5\n", " 49152, iter: 6\n", " 57344, iter: 7\n", " 65536, iter: 8\n", " 73728, iter: 9\n", " 81920, iter: 10\n", " 90112, iter: 11\n", " 98304, iter: 12\n", " 100000, iter: 13\n", "total= 100000\n" ] } ], "source": [ "script1='''\n", "select * from testblock\n", "'''\n", "block= s.run(script1, fetchSize = 8192)\n", "total = 0\n", "iter = 0\n", "while block.hasNext():\n", " tem = block.read() \n", " total += len(tem)\n", " iter += 1\n", " print(f\"{total: {total}}, iter: {iter}\")\n", " \n", "print(\"total=\", total)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import dolphindb as ddb\n", "import numpy as np\n", "\n", "s = ddb.session()\n", "s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n", "script='''\n", "dbPath = \"dfs://tableAppender\"\n", "\n", "if(existsDatabase(dbPath))\n", " dropDatabase(dbPath)\n", "\n", "t = table(\n", " 1000:0, \n", " `sym`date`month`time`minute`second`datetime`timestamp`nanotimestamp`qty, \n", " [SYMBOL, DATE,MONTH,TIME,MINUTE,SECOND,DATETIME,TIMESTAMP,NANOTIMESTAMP, INT]\n", " )\n", "\n", "db=database(dbPath,RANGE,100000 200000 300000 400000 600001)\n", "\n", "pt = db.createPartitionedTable(t, `pt, `qty)\n", "'''\n", "s.run(script)\n", "\n", "appender = ddb.tableAppender(\"dfs://tableAppender\",\"pt\", s)\n", "\n", "sym = list(map(str, np.arange(100000, 600000)))\n", "\n", "date = np.array(np.tile(['2012-01-01', 'NaT', '1965-07-25', 'NaT', '2020-12-23', '1970-01-01', 'NaT', 'NaT', 'NaT', '2009-08-05'],50000), dtype=\"datetime64[D]\")\n", "month = np.array(np.tile(['1965-08', 'NaT','2012-02', '2012-03', 'NaT'],100000), dtype=\"datetime64\")\n", "time = np.array(np.tile(['2012-01-01T00:00:00.000', '2015-08-26T05:12:48.426', 'NaT', 'NaT', '2015-06-09T23:59:59.999'],100000), dtype=\"datetime64\")\n", "second = np.array(np.tile(['2012-01-01T00:00:00', '2015-08-26T05:12:48', 'NaT', 'NaT', '2015-06-09T23:59:59'],100000), dtype=\"datetime64\")\n", "\n", "nanotime = np.array(np.tile(['2012-01-01T00:00:00.000000000', '2015-08-26T05:12:48.008007006', 'NaT', 'NaT', '2015-06-09T23:59:59.999008007'],100000), dtype=\"datetime64\")\n", "\n", "qty = np.arange(100000, 600000)\n", "\n", "data = pd.DataFrame({'sym': sym, 'date': date, 'month':month, 'time':time, 'minute':time, 'second':second, 'datetime':second, 'timestamp':time, 'nanotimestamp':nanotime, 'qty': qty})\n", "num = appender.append(data)\n", "\n", "print(num)\n", "print(s.run(\"select * from pt\"))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Note**: In asynchronous mode, only the method `session.run()` is supported to communicate with the server and no value is returned.\n", "\n", "The asynchronous mode shows better performance with higher data throughput. The following example writes to a stream table. For details on Python Streaming API, see Chap 10." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import dolphindb as ddb\n", "import numpy as np\n", "import pandas as pd\n", "import random\n", "import datetime\n", "\n", "s = ddb.session(enableASYN=True)\n", "s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n", "\n", "n = 100\n", "\n", "script = \"\"\"trades = streamTable(10000:0,`time`sym`price`id, [TIMESTAMP,SYMBOL,DOUBLE,INT])\"\"\"\n", "s.run(script) # The script above can be executed on the server\n", "\n", "# Randomly generate a DataFrame\n", "sym_list = ['IBN', 'GTYU', 'FHU', 'DGT', 'FHU', 'YUG', 'EE', 'ZD', 'FYU']\n", "price_list = []\n", "time_list = []\n", "for i in range(n):\n", " price_list.append(round(np.random.uniform(1, 100), 1))\n", " time_list.append(np.datetime64(datetime.date(2020, random.randint(1, 12), random.randint(1, 20))))\n", "\n", "tb = pd.DataFrame({'time': time_list,\n", " 'sym': np.random.choice(sym_list, n),\n", " 'price': price_list,\n", " 'id': np.random.choice([1, 2, 3, 4, 5], n)})\n", "\n", "s.run(\"append!{trades}\", tb)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }