Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 1847336

Browse files
author
Ivan Butygin
committed
tbb concurrent hash example
1 parent 4efe6b0 commit 1847336

File tree

4 files changed

+334
-0
lines changed

4 files changed

+334
-0
lines changed

sdc/_concurrent_hash.cpp

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//*****************************************************************************
2+
// Copyright (c) 2020, Intel Corporation All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions are met:
6+
//
7+
// Redistributions of source code must retain the above copyright notice,
8+
// this list of conditions and the following disclaimer.
9+
//
10+
// Redistributions in binary form must reproduce the above copyright notice,
11+
// this list of conditions and the following disclaimer in the documentation
12+
// and/or other materials provided with the distribution.
13+
//
14+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
//*****************************************************************************
26+
27+
#include <Python.h>
28+
#include <cstdint>
29+
#include <tbb/concurrent_hash_map.h>
30+
#include <tbb/concurrent_vector.h>
31+
32+
33+
template<typename Key, typename Val>
34+
using hashmap = tbb::concurrent_hash_map<Key,tbb::concurrent_vector<Val>>;
35+
36+
template<typename Key, typename Val>
37+
using iter_range = std::pair<typename hashmap<Key, Val>::iterator, typename hashmap<Key, Val>::iterator>;
38+
39+
using int_hashmap = hashmap<int64_t, size_t>;
40+
using int_hashmap_iters = iter_range<int64_t, size_t>;
41+
42+
extern "C"
43+
{
44+
void* create_int_hashmap()
45+
{
46+
return new int_hashmap;
47+
}
48+
49+
void delete_int_hashmap(void* obj)
50+
{
51+
delete static_cast<int_hashmap*>(obj);
52+
}
53+
54+
void addelem_int_hashmap(void* obj, int64_t key, size_t val)
55+
{
56+
auto& h = *static_cast<int_hashmap*>(obj);
57+
int_hashmap::accessor ac;
58+
h.insert(ac, key);
59+
auto& vec = ac->second;
60+
ac.release();
61+
vec.push_back(val);
62+
// h[key].push_back(val);
63+
}
64+
65+
void* createiter_int_hashmap(void* obj)
66+
{
67+
auto& h = *static_cast<int_hashmap*>(obj);
68+
return new int_hashmap_iters{h.begin(), h.end()};
69+
}
70+
71+
int32_t enditer_int_hashmap(void* it)
72+
{
73+
auto& r = *static_cast<int_hashmap_iters*>(it);
74+
return static_cast<int32_t>(r.first == r.second);
75+
}
76+
77+
void nextiter_int_hashmap(void* it)
78+
{
79+
auto& r = *static_cast<int_hashmap_iters*>(it);
80+
++r.first;
81+
}
82+
83+
int64_t iterkey_int_hashmap(void* it)
84+
{
85+
auto& r = *static_cast<int_hashmap_iters*>(it);
86+
return r.first->first;
87+
}
88+
89+
size_t itersize_int_hashmap(void* it)
90+
{
91+
auto& r = *static_cast<int_hashmap_iters*>(it);
92+
return r.first->second.size();
93+
}
94+
95+
size_t iterelem_int_hashmap(void* it, size_t index)
96+
{
97+
auto& r = *static_cast<int_hashmap_iters*>(it);
98+
return r.first->second[index];
99+
}
100+
101+
void deleteiter_int_hashmap(void* obj)
102+
{
103+
delete static_cast<int_hashmap_iters*>(obj);
104+
}
105+
106+
107+
PyMODINIT_FUNC PyInit_hconcurrent_hash()
108+
{
109+
static struct PyModuleDef moduledef = {
110+
PyModuleDef_HEAD_INIT,
111+
"hconcurrent_hash",
112+
"No docs",
113+
-1,
114+
NULL,
115+
};
116+
PyObject* m = PyModule_Create(&moduledef);
117+
if (m == NULL)
118+
{
119+
return NULL;
120+
}
121+
122+
#define REGISTER(func) PyObject_SetAttrString(m, #func, PyLong_FromVoidPtr((void*)(&func)));
123+
REGISTER(create_int_hashmap)
124+
REGISTER(delete_int_hashmap)
125+
REGISTER(addelem_int_hashmap)
126+
127+
REGISTER(createiter_int_hashmap)
128+
REGISTER(enditer_int_hashmap)
129+
REGISTER(nextiter_int_hashmap)
130+
REGISTER(iterkey_int_hashmap)
131+
REGISTER(itersize_int_hashmap)
132+
REGISTER(iterelem_int_hashmap)
133+
REGISTER(deleteiter_int_hashmap)
134+
#undef REGISTER
135+
return m;
136+
}
137+
}

sdc/concurrent_hash.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import numba
2+
import sdc
3+
4+
from numba import types, typing, generated_jit
5+
from numba.extending import models, register_model
6+
from numba.extending import lower_builtin, overload_method, overload, intrinsic
7+
8+
from llvmlite import ir as lir
9+
import llvmlite.binding as ll
10+
from . import hconcurrent_hash
11+
ll.add_symbol('create_int_hashmap', hconcurrent_hash.create_int_hashmap)
12+
ll.add_symbol('delete_int_hashmap', hconcurrent_hash.delete_int_hashmap)
13+
ll.add_symbol('addelem_int_hashmap',hconcurrent_hash.addelem_int_hashmap)
14+
15+
ll.add_symbol('createiter_int_hashmap',hconcurrent_hash.createiter_int_hashmap)
16+
ll.add_symbol('enditer_int_hashmap',hconcurrent_hash.enditer_int_hashmap)
17+
ll.add_symbol('nextiter_int_hashmap',hconcurrent_hash.nextiter_int_hashmap)
18+
ll.add_symbol('iterkey_int_hashmap',hconcurrent_hash.iterkey_int_hashmap)
19+
ll.add_symbol('itersize_int_hashmap',hconcurrent_hash.itersize_int_hashmap)
20+
ll.add_symbol('iterelem_int_hashmap',hconcurrent_hash.iterelem_int_hashmap)
21+
ll.add_symbol('deleteiter_int_hashmap',hconcurrent_hash.deleteiter_int_hashmap)
22+
23+
_create_int_hashmap = types.ExternalFunction("create_int_hashmap",
24+
types.voidptr())
25+
_delete_int_hashmap = types.ExternalFunction("delete_int_hashmap",
26+
types.void(types.voidptr))
27+
_addelem_int_hashmap = types.ExternalFunction("addelem_int_hashmap",
28+
types.void(types.voidptr, types.int64, types.intp))
29+
30+
_createiter_int_hashmap = types.ExternalFunction("createiter_int_hashmap",
31+
types.voidptr(types.voidptr))
32+
_enditer_int_hashmap = types.ExternalFunction("enditer_int_hashmap",
33+
types.int32(types.voidptr))
34+
_nextiter_int_hashmap = types.ExternalFunction("nextiter_int_hashmap",
35+
types.void(types.voidptr))
36+
_iterkey_int_hashmap = types.ExternalFunction("iterkey_int_hashmap",
37+
types.int64(types.voidptr))
38+
_itersize_int_hashmap = types.ExternalFunction("itersize_int_hashmap",
39+
types.intp(types.voidptr))
40+
_iterelem_int_hashmap = types.ExternalFunction("iterelem_int_hashmap",
41+
types.intp(types.voidptr, types.intp))
42+
_deleteiter_int_hashmap = types.ExternalFunction("deleteiter_int_hashmap",
43+
types.void(types.voidptr))
44+
45+
46+
def create_int_hashmap():
47+
pass
48+
49+
def delete_int_hashmap():
50+
pass
51+
52+
def addelem_int_hashmap():
53+
pass
54+
55+
56+
def createiter_int_hashmap():
57+
pass
58+
59+
def enditer_int_hashmap():
60+
pass
61+
62+
def nextiter_int_hashmap():
63+
pass
64+
65+
def iterkey_int_hashmap():
66+
pass
67+
68+
def itersize_int_hashmap():
69+
pass
70+
71+
def iterelem_int_hashmap():
72+
pass
73+
74+
def deleteiter_int_hashmap():
75+
pass
76+
77+
@overload(create_int_hashmap)
78+
def create_int_hashmap_overload():
79+
return lambda: _create_int_hashmap()
80+
81+
@overload(delete_int_hashmap)
82+
def delete_int_hashmap_overload(h):
83+
return lambda h: _delete_int_hashmap(h)
84+
85+
@overload(addelem_int_hashmap)
86+
def addelem_int_hashmap_overload(h, key, val):
87+
return lambda h, key, val: _addelem_int_hashmap(h, key, val)
88+
89+
90+
@overload(createiter_int_hashmap)
91+
def createiter_int_hashmap_overload(h):
92+
return lambda h: _createiter_int_hashmap(h)
93+
94+
@overload(enditer_int_hashmap)
95+
def enditer_int_hashmap_overload(h):
96+
return lambda h: _enditer_int_hashmap(h)
97+
98+
@overload(nextiter_int_hashmap)
99+
def nextiter_int_hashmap_overload(h):
100+
return lambda h: _nextiter_int_hashmap(h)
101+
102+
@overload(iterkey_int_hashmap)
103+
def iterkey_int_hashmap_overload(h):
104+
return lambda h: _iterkey_int_hashmap(h)
105+
106+
@overload(itersize_int_hashmap)
107+
def itersize_int_hashmap_overload(h):
108+
return lambda h: _itersize_int_hashmap(h)
109+
110+
@overload(iterelem_int_hashmap)
111+
def iterelem_int_hashmap_overload(h, i):
112+
return lambda h, i: _iterelem_int_hashmap(h, i)
113+
114+
@overload(deleteiter_int_hashmap)
115+
def deleteiter_int_hashmap_overload(h):
116+
return lambda h: _deleteiter_int_hashmap(h)

sdc/tests/test_dataframe.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,6 +1791,33 @@ def test_impl():
17911791

17921792
pd.testing.assert_series_equal(hpat_func(), test_impl())
17931793

1794+
def test_tbb(self):
1795+
import sdc.concurrent_hash
1796+
def test_impl():
1797+
h = sdc.concurrent_hash.create_int_hashmap()
1798+
1799+
sdc.concurrent_hash.addelem_int_hashmap(h,1,2)
1800+
sdc.concurrent_hash.addelem_int_hashmap(h,1,3)
1801+
sdc.concurrent_hash.addelem_int_hashmap(h,1,4)
1802+
sdc.concurrent_hash.addelem_int_hashmap(h,1,5)
1803+
sdc.concurrent_hash.addelem_int_hashmap(h,2,6)
1804+
1805+
it = sdc.concurrent_hash.createiter_int_hashmap(h)
1806+
while 0 == sdc.concurrent_hash.enditer_int_hashmap(it):
1807+
key = sdc.concurrent_hash.iterkey_int_hashmap(it)
1808+
sz = sdc.concurrent_hash.itersize_int_hashmap(it)
1809+
for i in range(sz):
1810+
val = sdc.concurrent_hash.iterelem_int_hashmap(it, i)
1811+
print(key, val)
1812+
1813+
sdc.concurrent_hash.nextiter_int_hashmap(it)
1814+
1815+
sdc.concurrent_hash.deleteiter_int_hashmap(it)
1816+
sdc.concurrent_hash.delete_int_hashmap(h)
1817+
1818+
hpat_func = self.jit(test_impl)
1819+
hpat_func()
1820+
17941821

17951822
if __name__ == "__main__":
17961823
unittest.main()

setup.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# *****************************************************************************
2727

2828
from setuptools import setup, Extension, find_packages, Command
29+
import sys
2930
import platform
3031
import os
3132
from docs.source.buildscripts.sdc_build_doc import SDCBuildDoc
@@ -197,6 +198,59 @@ def readme():
197198
if _has_opencv:
198199
_ext_mods.append(ext_cv_wrapper)
199200

201+
# Copypaste from numba
202+
def check_file_at_path(path2file):
203+
"""
204+
Takes a list as a path, a single glob (*) is permitted as an entry which
205+
indicates that expansion at this location is required (i.e. version
206+
might not be known).
207+
"""
208+
found = None
209+
path2check = [os.path.split(os.path.split(sys.executable)[0])[0]]
210+
path2check += [os.getenv(n, '') for n in ['CONDA_PREFIX', 'PREFIX']]
211+
if sys.platform.startswith('win'):
212+
path2check += [os.path.join(p, 'Library') for p in path2check]
213+
for p in path2check:
214+
if p:
215+
if '*' in path2file:
216+
globloc = path2file.index('*')
217+
searchroot = os.path.join(*path2file[:globloc])
218+
try:
219+
potential_locs = os.listdir(os.path.join(p, searchroot))
220+
except BaseException:
221+
continue
222+
searchfor = path2file[globloc + 1:]
223+
for x in potential_locs:
224+
potpath = os.path.join(p, searchroot, x, *searchfor)
225+
if os.path.isfile(potpath):
226+
found = p # the latest is used
227+
elif os.path.isfile(os.path.join(p, *path2file)):
228+
found = p # the latest is used
229+
return found
230+
231+
# Search for Intel TBB, first check env var TBBROOT then conda locations
232+
tbb_root = os.getenv('TBBROOT')
233+
if not tbb_root:
234+
tbb_root = check_file_at_path(['include', 'tbb', 'tbb.h'])
235+
236+
print("Using Intel TBB from:", tbb_root)
237+
ext_hconcurrent_hash = Extension(
238+
name="sdc.hconcurrent_hash",
239+
sources=["sdc/_concurrent_hash.cpp"],
240+
include_dirs=[os.path.join(tbb_root, 'include')],
241+
libraries=['tbb'],
242+
library_dirs=[
243+
# for Linux
244+
os.path.join(tbb_root, 'lib', 'intel64', 'gcc4.4'),
245+
# for MacOS
246+
os.path.join(tbb_root, 'lib'),
247+
# for Windows
248+
os.path.join(tbb_root, 'lib', 'intel64', 'vc_mt'),
249+
],
250+
language="c++",
251+
)
252+
253+
_ext_mods.append(ext_hconcurrent_hash)
200254

201255
class style(Command):
202256
""" Command to check and adjust code style

0 commit comments

Comments
 (0)