From 4f2275ec8a6e3546c4251db9e9938f7b3fd29e68 Mon Sep 17 00:00:00 2001 From: gitCommitWiL <43019766+gitCommitWiL@users.noreply.github.com> Date: Thu, 26 Mar 2020 07:22:16 -0400 Subject: [PATCH] Updated for raspberry pi and fixed some other issues - updated spaCy requirements - included install script for raspi - made changes to spaCy loading save memory - by default, have best_match adapter allow repeated responses, with option to disable --- README.md | 6 ++++++ chatterbot/chatterbot.py | 15 +++++++++++---- chatterbot/comparisons.py | 5 ++--- chatterbot/logic/best_match.py | 21 +++++++++++++-------- chatterbot/singleton_classes.py | 16 ++++++++++++++++ chatterbot/storage/mongodb.py | 2 ++ chatterbot/tagging.py | 5 ++--- install.sh | 19 +++++++++++++++++++ requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 3 +-- 11 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 chatterbot/singleton_classes.py create mode 100644 install.sh diff --git a/README.md b/README.md index 00c30fffc..28d66cc6d 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,12 @@ This package can be installed from [PyPi](https://pypi.python.org/pypi/ChatterBo pip install chatterbot ``` +For Raspberry Pi users: download or clone the repo and then run the install script: + +``` +./install.sh +``` + ## Basic Usage ``` diff --git a/chatterbot/chatterbot.py b/chatterbot/chatterbot.py index ee0d9085f..9ff965a0a 100644 --- a/chatterbot/chatterbot.py +++ b/chatterbot/chatterbot.py @@ -127,10 +127,11 @@ def get_response(self, statement=None, **kwargs): setattr(response, response_key, response_value) if not self.read_only: - self.learn_response(input_statement) + # want to learn that response is valid for input statement + self.learn_response(response, input_statement) - # Save the response generated for the input - self.storage.create(**response.serialize()) + # also save the input statement + self.storage.create(**input_statement.serialize()) return response @@ -211,6 +212,8 @@ def learn_response(self, statement, previous_statement=None): """ Learn that the statement provided is a valid response. """ + if not statement.search_text: + statement.search_text = self.storage.tagger.get_text_index_string(statement.text) if not previous_statement: previous_statement = statement.in_response_to @@ -223,15 +226,19 @@ def learn_response(self, statement, previous_statement=None): if not isinstance(previous_statement, (str, type(None), )): statement.in_response_to = previous_statement.text + if not statement.search_in_response_to: + statement.search_in_response_to = previous_statement.search_text elif isinstance(previous_statement, str): statement.in_response_to = previous_statement + if not statement.search_in_response_to: + statement.search_in_response_to = self.storage.tagger.get_text_index_string(previous_statement) self.logger.info('Adding "{}" as a response to "{}"'.format( statement.text, previous_statement_text )) - # Save the input statement + # Save the response return self.storage.create(**statement.serialize()) def get_latest_response(self, conversation): diff --git a/chatterbot/comparisons.py b/chatterbot/comparisons.py index 5c5cd26dc..2de6c0fc6 100644 --- a/chatterbot/comparisons.py +++ b/chatterbot/comparisons.py @@ -3,7 +3,7 @@ designed to compare one statement to another. """ from difflib import SequenceMatcher - +from chatterbot import singleton_classes class Comparator: @@ -63,9 +63,8 @@ class SpacySimilarity(Comparator): def __init__(self, language): super().__init__(language) - import spacy - self.nlp = spacy.load(self.language.ISO_639_1) + self.nlp = singleton_classes.singleSpacy.getInstance(language) def compare(self, statement_a, statement_b): """ diff --git a/chatterbot/logic/best_match.py b/chatterbot/logic/best_match.py index fa6e98999..e2044a2db 100644 --- a/chatterbot/logic/best_match.py +++ b/chatterbot/logic/best_match.py @@ -21,6 +21,7 @@ def __init__(self, chatbot, **kwargs): super().__init__(chatbot, **kwargs) self.excluded_words = kwargs.get('excluded_words') + self.exclude_recent_repeated = kwargs.get('exclude_recent_repeated') def process(self, input_statement, additional_response_selection_parameters=None): search_results = self.search_algorithm.search(input_statement) @@ -40,15 +41,19 @@ def process(self, input_statement, additional_response_selection_parameters=None closest_match.text, input_statement.text, closest_match.confidence )) - recent_repeated_responses = filters.get_recent_repeated_responses( - self.chatbot, - input_statement.conversation - ) + recent_repeated_responses = None - for index, recent_repeated_response in enumerate(recent_repeated_responses): - self.chatbot.logger.info('{}. Excluding recent repeated response of "{}"'.format( - index, recent_repeated_response - )) + # remove recently repeated only if user specifies + if self.exclude_recent_repeated: + recent_repeated_responses = filters.get_recent_repeated_responses( + self.chatbot, + input_statement.conversation + ) + + for index, recent_repeated_response in enumerate(recent_repeated_responses): + self.chatbot.logger.info('{}. Excluding recent repeated response of "{}"'.format( + index, recent_repeated_response + )) response_selection_parameters = { 'search_in_response_to': closest_match.search_text, diff --git a/chatterbot/singleton_classes.py b/chatterbot/singleton_classes.py new file mode 100644 index 000000000..a06ca0659 --- /dev/null +++ b/chatterbot/singleton_classes.py @@ -0,0 +1,16 @@ +from chatterbot import languages +import spacy + +# loading spacy more than once slows down everything and makes it consume a lot of extra memory +# so having a single instance will save memory +class singleSpacy: + _instance = None + @staticmethod + def getInstance(language=None): + if singleSpacy._instance is None: + singleSpacy(language) + return singleSpacy._instance + + def __init__(self, language=None): + language = language or languages.ENG + singleSpacy._instance = spacy.load(language.ISO_639_1.lower()) diff --git a/chatterbot/storage/mongodb.py b/chatterbot/storage/mongodb.py index 5e1256fdb..4062153cd 100644 --- a/chatterbot/storage/mongodb.py +++ b/chatterbot/storage/mongodb.py @@ -124,6 +124,8 @@ def filter(self, **kwargs): or_regex = '|'.join([ '{}'.format(re.escape(word)) for word in search_text_contains.split(' ') ]) + # try matching whole words rather than part; for example 'hi' shouldn't match 'white' + or_regex = '\\b' + or_regex + '\\b' kwargs['search_text'] = re.compile(or_regex) mongo_ordering = [] diff --git a/chatterbot/tagging.py b/chatterbot/tagging.py index ea3c31d26..21e335e9f 100644 --- a/chatterbot/tagging.py +++ b/chatterbot/tagging.py @@ -1,6 +1,6 @@ import string from chatterbot import languages - +from chatterbot import singleton_classes class LowercaseTagger(object): """ @@ -17,13 +17,12 @@ def get_text_index_string(self, text): class PosLemmaTagger(object): def __init__(self, language=None): - import spacy self.language = language or languages.ENG self.punctuation_table = str.maketrans(dict.fromkeys(string.punctuation)) - self.nlp = spacy.load(self.language.ISO_639_1.lower()) + self.nlp = singleton_classes.singleSpacy.getInstance(language) def get_text_index_string(self, text): """ diff --git a/install.sh b/install.sh new file mode 100644 index 000000000..fbf99b856 --- /dev/null +++ b/install.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# may need to install/ update Cython +# python3 -m pip install -U Cython + +if + # for arm architecure, need to set BLIS to generic for install to work + lscpu| head -n 1 | grep -q 'aarch\|arm'; then + echo "Setting BLIS_ARCH to generic" + export BLIS_ARCH='generic' +fi +echo "Installing Chatterbot" +python3 -m pip install . + +# change 'en' to any other language if desired; by default the small model is downloaded +# but this can be changed; for example: change 'en' to 'en_core_web_lg' for the large model +# then will need to create corresponding shortcut link; for example: python3 -m spacy link --force en_core_web_lg en +echo "Downloading and linking spaCy en model" +python3 -m spacy download en diff --git a/requirements.txt b/requirements.txt index 4a000abd0..6f0d257ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,6 @@ nltk>=3.2,<4.0 pint>=0.8.1 python-dateutil>=2.7,<2.8 pyyaml>=5.1,<5.2 -spacy>=2.1,<2.2 +spacy>=2.2,<2.3 sqlalchemy>=1.3,<1.4 pytz diff --git a/setup.cfg b/setup.cfg index 57b63e266..3e6ca1aeb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ max_line_length = 175 exclude = .eggs, .git, .tox, build, [chatterbot] -version = 1.1.0 +version = 1.1.1 author = Gunther Cox email = gunthercx@gmail.com url = https://github.com/gunthercox/ChatterBot diff --git a/setup.py b/setup.py index e4039b30e..442757fe7 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,6 @@ else: REQUIREMENTS.append(requirement) - setup( name='ChatterBot', version=VERSION, @@ -71,7 +70,7 @@ dependency_links=DEPENDENCIES, python_requires='>=3.4, <4', license='BSD', - zip_safe=True, + zip_safe=False, platforms=['any'], keywords=['ChatterBot', 'chatbot', 'chat', 'bot'], classifiers=[