You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
355 lines
731 KiB
355 lines
731 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"True"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import dolphindb as ddb\n",
|
|
"s = ddb.session()\n",
|
|
"s.connect(\"localhost\", 8848, 'admin', '123456')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import dolphindb as ddb\n",
|
|
"import dolphindb.settings as keys\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"from pprint import pprint"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"array(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',\n",
|
|
" '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',\n",
|
|
" '2012-01-09', '2012-01-10'], dtype='datetime64[D]')\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"s = ddb.session()\n",
|
|
"s.connect('localhost', 8848, 'admin', '123456')\n",
|
|
"\n",
|
|
"dates = np.array(pd.date_range(start='20120101', end='20120110'), dtype='datetime64[D]')\n",
|
|
"pprint(dates)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dbPath = \"dfs://tsdb\"\n",
|
|
"if s.existsDatabase(dbPath): s.dropDatabase(dbPath)\n",
|
|
"\n",
|
|
"# TODO: \n",
|
|
"# 1. change `partitionType` to keys.COMPO\n",
|
|
"# 2. specify the partition hierarchy\n",
|
|
"db = s.database(\n",
|
|
" dbName='mydb_tsdb', \n",
|
|
" partitionType=keys.VALUE, \n",
|
|
" partitions=dates, \n",
|
|
" dbPath=dbPath,\n",
|
|
" engine=\"TSDB\")\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" datetime sym val\n",
|
|
"0 2012-01-01 AA 1\n",
|
|
"1 2012-01-02 BB 2\n",
|
|
"2 2012-01-04 BB 3\n",
|
|
"3 2012-01-05 AA 4\n",
|
|
"4 2012-01-08 BB 5\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df = pd.DataFrame({\n",
|
|
" 'datetime' : np.array(\n",
|
|
" np.array(\n",
|
|
" ['2012-01-01T00:00:00', '2012-01-02T00:00:00', '2012-01-04T00:00:00', '2012-01-05T00:00:00', '2012-01-08T00:00:00'],\n",
|
|
" dtype='datetime64')\n",
|
|
" ),\n",
|
|
" 'sym' : ['AA', 'BB', 'BB', 'AA', 'BB'],\n",
|
|
" 'val' : range(1, 6)\n",
|
|
"})\n",
|
|
"pprint(df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([ '2012-01-01T00:00:00', '20120102-01-01T00:00:00',\n",
|
|
" '2012-01-04T00:00:00', '2012-01-05T00:00:00',\n",
|
|
" '2012-01-08T00:00:00'], dtype='datetime64[s]')"
|
|
]
|
|
},
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"np.array(\n",
|
|
" ['2012-01-01', '20120102', '2012-01-04T00:00:00', '2012-01-05T00:00:00', '2012-01-08T00:00:00'],\n",
|
|
" dtype='datetime64')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"t = s.table(data=df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pt = db.createPartitionedTable(table=t, tableName='pt', partitionColumns='datetime', sortColumns=[\"sym\", \"datetime\"])\n",
|
|
"pt.append(t)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" datetime sym val\n",
|
|
"0 2012-01-01 AA 1\n",
|
|
"1 2012-01-01 AA 1\n",
|
|
"2 2012-01-02 BB 2\n",
|
|
"3 2012-01-02 BB 2\n",
|
|
"4 2012-01-04 BB 3\n",
|
|
"5 2012-01-04 BB 3\n",
|
|
"6 2012-01-05 AA 4\n",
|
|
"7 2012-01-05 AA 4\n",
|
|
"8 2012-01-08 BB 5\n",
|
|
"9 2012-01-08 BB 5\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"re = s.loadTable(tableName='pt', dbPath=dbPath).toDF()\n",
|
|
"pprint(re)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"s = ddb.session()\n",
|
|
"s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n",
|
|
"script='''\n",
|
|
" rows=100000;\n",
|
|
" testblock=table(\n",
|
|
" take(1,rows) as id,\n",
|
|
" take(`A,rows) as symbol,\n",
|
|
" take(2020.08.01..2020.10.01,rows) as date, \n",
|
|
" rand(50,rows) as size,\n",
|
|
" rand(50.5,rows) as price\n",
|
|
" );\n",
|
|
"'''\n",
|
|
"s.run(script)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 8192, iter: 1\n",
|
|
" 16384, iter: 2\n",
|
|
" 24576, iter: 3\n",
|
|
" 32768, iter: 4\n",
|
|
" 40960, iter: 5\n",
|
|
" 49152, iter: 6\n",
|
|
" 57344, iter: 7\n",
|
|
" 65536, iter: 8\n",
|
|
" 73728, iter: 9\n",
|
|
" 81920, iter: 10\n",
|
|
" 90112, iter: 11\n",
|
|
" 98304, iter: 12\n",
|
|
" 100000, iter: 13\n",
|
|
"total= 100000\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"script1='''\n",
|
|
"select * from testblock\n",
|
|
"'''\n",
|
|
"block= s.run(script1, fetchSize = 8192)\n",
|
|
"total = 0\n",
|
|
"iter = 0\n",
|
|
"while block.hasNext():\n",
|
|
" tem = block.read() \n",
|
|
" total += len(tem)\n",
|
|
" iter += 1\n",
|
|
" print(f\"{total: {total}}, iter: {iter}\")\n",
|
|
" \n",
|
|
"print(\"total=\", total)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import dolphindb as ddb\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"s = ddb.session()\n",
|
|
"s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n",
|
|
"script='''\n",
|
|
"dbPath = \"dfs://tableAppender\"\n",
|
|
"\n",
|
|
"if(existsDatabase(dbPath))\n",
|
|
" dropDatabase(dbPath)\n",
|
|
"\n",
|
|
"t = table(\n",
|
|
" 1000:0, \n",
|
|
" `sym`date`month`time`minute`second`datetime`timestamp`nanotimestamp`qty, \n",
|
|
" [SYMBOL, DATE,MONTH,TIME,MINUTE,SECOND,DATETIME,TIMESTAMP,NANOTIMESTAMP, INT]\n",
|
|
" )\n",
|
|
"\n",
|
|
"db=database(dbPath,RANGE,100000 200000 300000 400000 600001)\n",
|
|
"\n",
|
|
"pt = db.createPartitionedTable(t, `pt, `qty)\n",
|
|
"'''\n",
|
|
"s.run(script)\n",
|
|
"\n",
|
|
"appender = ddb.tableAppender(\"dfs://tableAppender\",\"pt\", s)\n",
|
|
"\n",
|
|
"sym = list(map(str, np.arange(100000, 600000)))\n",
|
|
"\n",
|
|
"date = np.array(np.tile(['2012-01-01', 'NaT', '1965-07-25', 'NaT', '2020-12-23', '1970-01-01', 'NaT', 'NaT', 'NaT', '2009-08-05'],50000), dtype=\"datetime64[D]\")\n",
|
|
"month = np.array(np.tile(['1965-08', 'NaT','2012-02', '2012-03', 'NaT'],100000), dtype=\"datetime64\")\n",
|
|
"time = np.array(np.tile(['2012-01-01T00:00:00.000', '2015-08-26T05:12:48.426', 'NaT', 'NaT', '2015-06-09T23:59:59.999'],100000), dtype=\"datetime64\")\n",
|
|
"second = np.array(np.tile(['2012-01-01T00:00:00', '2015-08-26T05:12:48', 'NaT', 'NaT', '2015-06-09T23:59:59'],100000), dtype=\"datetime64\")\n",
|
|
"\n",
|
|
"nanotime = np.array(np.tile(['2012-01-01T00:00:00.000000000', '2015-08-26T05:12:48.008007006', 'NaT', 'NaT', '2015-06-09T23:59:59.999008007'],100000), dtype=\"datetime64\")\n",
|
|
"\n",
|
|
"qty = np.arange(100000, 600000)\n",
|
|
"\n",
|
|
"data = pd.DataFrame({'sym': sym, 'date': date, 'month':month, 'time':time, 'minute':time, 'second':second, 'datetime':second, 'timestamp':time, 'nanotimestamp':nanotime, 'qty': qty})\n",
|
|
"num = appender.append(data)\n",
|
|
"\n",
|
|
"print(num)\n",
|
|
"print(s.run(\"select * from pt\"))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Note**: In asynchronous mode, only the method `session.run()` is supported to communicate with the server and no value is returned.\n",
|
|
"\n",
|
|
"The asynchronous mode shows better performance with higher data throughput. The following example writes to a stream table. For details on Python Streaming API, see Chap 10."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import dolphindb as ddb\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import random\n",
|
|
"import datetime\n",
|
|
"\n",
|
|
"s = ddb.session(enableASYN=True)\n",
|
|
"s.connect(\"localhost\", 8848, \"admin\", \"123456\")\n",
|
|
"\n",
|
|
"n = 100\n",
|
|
"\n",
|
|
"script = \"\"\"trades = streamTable(10000:0,`time`sym`price`id, [TIMESTAMP,SYMBOL,DOUBLE,INT])\"\"\"\n",
|
|
"s.run(script) # The script above can be executed on the server\n",
|
|
"\n",
|
|
"# Randomly generate a DataFrame\n",
|
|
"sym_list = ['IBN', 'GTYU', 'FHU', 'DGT', 'FHU', 'YUG', 'EE', 'ZD', 'FYU']\n",
|
|
"price_list = []\n",
|
|
"time_list = []\n",
|
|
"for i in range(n):\n",
|
|
" price_list.append(round(np.random.uniform(1, 100), 1))\n",
|
|
" time_list.append(np.datetime64(datetime.date(2020, random.randint(1, 12), random.randint(1, 20))))\n",
|
|
"\n",
|
|
"tb = pd.DataFrame({'time': time_list,\n",
|
|
" 'sym': np.random.choice(sym_list, n),\n",
|
|
" 'price': price_list,\n",
|
|
" 'id': np.random.choice([1, 2, 3, 4, 5], n)})\n",
|
|
"\n",
|
|
"s.run(\"append!{trades}\", tb)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|