Skip to content

feat: Added support for ADDHASH command #69

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 6, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion API.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ Add a single document to the index.
NOTE: Geo points shoule be encoded as strings of "lon,lat"


### add\_document\_hash
```py

def add_document_hash(self, doc_id, score=1.0, language=None, replace=False)

```



Add a hash document to the index.

### Parameters

- **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs.
- **score**: the document ranking, between 0.0 and 1.0
- **replace**: if True, and the document already is in the index, we perform an update and reindex the document
- **language**: Specify the language used for document tokenization.


### aggregate
```py

Expand Down Expand Up @@ -264,7 +283,7 @@ Create the search index. The index must not already exist.
### delete\_document
```py

def delete_document(self, doc_id, conn=None)
def delete_document(self, doc_id, conn=None, delete_actual_document=False)

```

Expand All @@ -273,6 +292,9 @@ def delete_document(self, doc_id, conn=None)
Delete a document from index
Returns 1 if the document was deleted, 0 if not

### Parameters

- **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index

### drop\_index
```py
Expand Down Expand Up @@ -361,6 +383,18 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None, replace=Fa
Add a document to the batch query


### add\_document\_hash
```py

def add_document_hash(self, doc_id, score=1.0, language=None, replace=False)

```



Add a hash document to the batch query


### commit
```py

Expand Down
3 changes: 1 addition & 2 deletions gendoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,4 @@ def generatedocs(module):
print("Error while trying to import " + module)

if __name__ == '__main__':

print generatedocs(sys.argv[1])
print(generatedocs(sys.argv[1]))
93 changes: 76 additions & 17 deletions redisearch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Field(object):
def __init__(self, name, *args):
self.name = name
self.args = args

def redis_args(self):
return [self.name] + list(self.args)

Expand Down Expand Up @@ -87,14 +87,15 @@ class TagField(Field):
TagField is a tag-indexing field with simpler compression and tokenization.
See http://redisearch.io/Tags/
"""
def __init__(self, name, separator = ',', no_index=False):

def __init__(self, name, separator=',', no_index=False):
args = [Field.TAG, Field.SEPARATOR, separator]

if no_index:
args.append(Field.NOINDEX)

Field.__init__(self, name, *args)


class Client(object):
"""
Expand All @@ -108,6 +109,7 @@ class Client(object):
ALTER_CMD = 'FT.ALTER'
SEARCH_CMD = 'FT.SEARCH'
ADD_CMD = 'FT.ADD'
ADDHASH_CMD = "FT.ADDHASH"
DROP_CMD = 'FT.DROP'
EXPLAIN_CMD = 'FT.EXPLAIN'
DEL_CMD = 'FT.DEL'
Expand All @@ -120,7 +122,6 @@ class Client(object):
GET_CMD = 'FT.GET'
MGET_CMD = 'FT.MGET'


NOOFFSETS = 'NOOFFSETS'
NOFIELDS = 'NOFIELDS'
STOPWORDS = 'STOPWORDS'
Expand Down Expand Up @@ -156,6 +157,20 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
if self.current_chunk >= self.chunk_size:
self.commit()

def add_document_hash(
self, doc_id, score=1.0, replace=False,
):
"""
Add a hash to the batch query
"""
self.client._add_document_hash(
doc_id, conn=self.pipeline, score=score, replace=replace,
)
self.current_chunk += 1
self.total += 1
if self.current_chunk >= self.chunk_size:
self.commit()

def commit(self):
"""
Manually commit and flush the batch indexing query
Expand All @@ -182,7 +197,7 @@ def batch_indexer(self, chunk_size=100):
return Client.BatchIndexer(self, chunk_size=chunk_size)

def create_index(self, fields, no_term_offsets=False,
no_field_flags=False, stopwords = None):
no_field_flags=False, stopwords=None):
"""
Create the search index. The index must not already exist.

Expand All @@ -203,7 +218,7 @@ def create_index(self, fields, no_term_offsets=False,
args += [self.STOPWORDS, len(stopwords)]
if len(stopwords) > 0:
args += list(stopwords)

args.append('SCHEMA')

args += list(itertools.chain(*(f.redis_args() for f in fields)))
Expand All @@ -230,7 +245,7 @@ def drop_index(self):
Drop the index if it exists
"""
return self.redis.execute_command(self.DROP_CMD, self.index_name)

def _add_document(self, doc_id, conn=None, nosave=False, score=1.0, payload=None,
replace=False, partial=False, language=None, no_create=False, **fields):
"""
Expand Down Expand Up @@ -260,6 +275,25 @@ def _add_document(self, doc_id, conn=None, nosave=False, score=1.0, payload=None
args += list(itertools.chain(*fields.items()))
return conn.execute_command(*args)

def _add_document_hash(
self, doc_id, conn=None, score=1.0, language=None, replace=False,
):
"""
Internal add_document_hash used for both batch and single doc indexing
"""
if conn is None:
conn = self.redis

args = [self.ADDHASH_CMD, self.index_name, doc_id, score]

if replace:
args.append("REPLACE")

if language:
args += ["LANGUAGE", language]

return conn.execute_command(*args)

def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
replace=False, partial=False, language=None, no_create=False, **fields):
"""
Expand All @@ -281,20 +315,44 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
- **fields** kwargs dictionary of the document fields to be saved and/or indexed.
NOTE: Geo points shoule be encoded as strings of "lon,lat"
"""
return self._add_document(doc_id, conn=None, nosave=nosave, score=score,
return self._add_document(doc_id, conn=None, nosave=nosave, score=score,
payload=payload, replace=replace,
partial=partial, language=language,
no_create=no_create,**fields)
partial=partial, language=language,
no_create=no_create, **fields)

def add_document_hash(
self, doc_id, score=1.0, language=None, replace=False,
):
"""
Add a hash document to the index.

### Parameters

- **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs.
- **score**: the document ranking, between 0.0 and 1.0
- **replace**: if True, and the document already is in the index, we perform an update and reindex the document
- **language**: Specify the language used for document tokenization.
"""
return self._add_document_hash(
doc_id, conn=None, score=score, language=language, replace=replace,
)

def delete_document(self, doc_id, conn=None):
def delete_document(self, doc_id, conn=None, delete_actual_document=False):
"""
Delete a document from index
Returns 1 if the document was deleted, 0 if not

### Parameters

- **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index
"""
args = [self.DEL_CMD, self.index_name, doc_id]
if conn is None:
conn = self.redis
if delete_actual_document:
args.append('DD')

return conn.execute_command(self.DEL_CMD, self.index_name, doc_id)
return conn.execute_command(*args)

def load_document(self, id):
"""
Expand All @@ -315,12 +373,12 @@ def load_document(self, id):
def get(self, *ids):
"""
Returns the full contents of multiple documents.

### Parameters

- **ids**: the ids of the saved documents.
"""

return self.redis.execute_command('FT.MGET', self.index_name, *ids)

def info(self):
Expand Down Expand Up @@ -386,7 +444,8 @@ def aggregate(self, query):
elif isinstance(query, Cursor):
has_schema = False
has_cursor = True
cmd = [self.CURSOR_CMD, 'READ', self.index_name] + query.build_args()
cmd = [self.CURSOR_CMD, 'READ',
self.index_name] + query.build_args()
else:
raise ValueError('Bad query', query)

Expand All @@ -401,7 +460,7 @@ def aggregate(self, query):
else:
cursor = None

if query._with_schema:
if isinstance(query, AggregateRequest) and query._with_schema:
schema = raw[0]
rows = raw[2:]
else:
Expand Down