diff --git a/.gitignore b/.gitignore deleted file mode 100644 index b6e4761..0000000 --- a/.gitignore +++ /dev/null @@ -1,129 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/aws.xml b/.idea/aws.xml new file mode 100644 index 0000000..1850186 --- /dev/null +++ b/.idea/aws.xml @@ -0,0 +1,17 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/flask-ner.iml b/.idea/flask-ner.iml new file mode 100644 index 0000000..8c8b8cf --- /dev/null +++ b/.idea/flask-ner.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fca3432 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..25df3e9 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index e5a8b0a..0000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -# tdd-python \ No newline at end of file diff --git a/__pycache__/ner_client.cpython-38.pyc b/__pycache__/ner_client.cpython-38.pyc new file mode 100644 index 0000000..b18d0bf Binary files /dev/null and b/__pycache__/ner_client.cpython-38.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000..8711c05 --- /dev/null +++ b/app.py @@ -0,0 +1,26 @@ +from flask import Flask, render_template, request +import json +import spacy +from spacy import displacy +from ner_client import NamedEntityClient +from flask_cors import CORS + +app = Flask(__name__) +CORS(app) + +ner = spacy.load("en_core_web_sm") +ner = NamedEntityClient(ner, displacy) + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/ner', methods=['POST']) +def get_named_ents(): + data = request.get_json() + result = ner.get_ents(data['sentence']) + response = { "entities": result.get('ents'), "html": result.get('html') } + return json.dumps(response) + +if __name__ == "__main__": + app.run(debug=True) \ No newline at end of file diff --git a/flaskner.egg-info/PKG-INFO b/flaskner.egg-info/PKG-INFO new file mode 100644 index 0000000..a98d6c8 --- /dev/null +++ b/flaskner.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 2.1 +Name: flaskner +Version: 0.0.1 +Summary: A simple NER API +Home-page: UNKNOWN +License: UNKNOWN +Platform: UNKNOWN + +UNKNOWN + diff --git a/flaskner.egg-info/SOURCES.txt b/flaskner.egg-info/SOURCES.txt new file mode 100644 index 0000000..79cc34b --- /dev/null +++ b/flaskner.egg-info/SOURCES.txt @@ -0,0 +1,5 @@ +setup.py +flaskner.egg-info/PKG-INFO +flaskner.egg-info/SOURCES.txt +flaskner.egg-info/dependency_links.txt +flaskner.egg-info/top_level.txt \ No newline at end of file diff --git a/flaskner.egg-info/dependency_links.txt b/flaskner.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/flaskner.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/flaskner.egg-info/top_level.txt b/flaskner.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/flaskner.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/ner_client.py b/ner_client.py new file mode 100644 index 0000000..1ddae23 --- /dev/null +++ b/ner_client.py @@ -0,0 +1,22 @@ +class NamedEntityClient: + def __init__(self, model, displacy): + self.model = model + self.displacy = displacy + + def get_ents(self, sentence): + doc = self.model(sentence) + html = self.displacy.render(doc, style="ent") + entities = [{ 'ent': ent.text, 'label': self.map_label(ent.label_) } for ent in doc.ents] + return { 'ents': entities, 'html': html } + + @staticmethod + def map_label(label): + label_map = { + 'PERSON' : 'Person', + 'NORP' : 'Group', + 'LOC' : 'Location', + 'GPE' : 'Location', + 'LANGUAGE': 'Language' + } + + return label_map.get(label) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f7937b4 --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +from distutils.core import setup +from setuptools import find_packages + +setup( + name="flaskner", + version="0.0.1", + description="A simple NER API" +) \ No newline at end of file diff --git a/static/ner.js b/static/ner.js new file mode 100644 index 0000000..f201553 --- /dev/null +++ b/static/ner.js @@ -0,0 +1,63 @@ +const fetchResult = (sent) => { + fetch('http://localhost:5000/ner', { + method: 'POST', + headers: new Headers({ 'content-type': 'application/json' }), + cache: 'no-cache', + body: JSON.stringify({ 'sentence': sent }) + }).then(async (response) => { + if (response.ok) { + let result = await response.json(); + let labeledDoc = document.getElementById('labeled-doc') + labeledDoc.innerHTML = result.html; + return buildNerTable(result.entities); + } + return Promise.reject(response); + }).catch((error) => { + console.error('Something went wrong.', error); + }); +} + +const buildNerTable = (results) => { + let table = document.getElementById('ner-table'); + + results.forEach(result => { + let row = document.createElement('tr'); + row.classList.add('ner-row'); + + let colName = document.createElement('td'); + colName.textContent = result.ent; + + let colType = document.createElement('td'); + colType.textContent = result.label; + + row.appendChild(colName); + row.appendChild(colType); + + table.appendChild(row); + }) +} + +const cleanTable = () => { + let rows = document.querySelectorAll('.ner-row'); + rows.forEach(row => { + row.remove(); + }) +} + +const updateResults = async (userInput) => { + cleanTable(); + await fetchResult(userInput.value); +} + +const init = async () => { + const submitButton = document.getElementById('find-button'); + const userInput = document.getElementById('input-text'); + + submitButton.addEventListener('click', async (e) => { + await updateResults(userInput); + }) +} + +window.onload = (event) => { + init(); +} diff --git a/static/site.css b/static/site.css new file mode 100644 index 0000000..4babbed --- /dev/null +++ b/static/site.css @@ -0,0 +1,3 @@ +#app { + max-width: 800px; +} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..0fd7902 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,52 @@ + + + Named Entity Finder + + + + +
+ +
+

Named Entity Finder

+
+ +
+
+
+
+ +
+
+
+ +
+ +
+
+ +
+ + + + + + + +
+ Entity + + Type +
+
+
+ + + \ No newline at end of file diff --git a/test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc b/test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc new file mode 100644 index 0000000..2a51f9d Binary files /dev/null and b/test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc differ diff --git a/test/geckodriver.log b/test/geckodriver.log new file mode 100644 index 0000000..072dfcd --- /dev/null +++ b/test/geckodriver.log @@ -0,0 +1,149 @@ +1628545153397 geckodriver INFO Listening on 127.0.0.1:61906 +1628545153429 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileDzoXLR" +1628545158226 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileDzoXLR/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628545159935 Marionette INFO Listening on port 61915 +1628545160035 Marionette WARN TLS certificate errors will be ignored for this session +console.error: services.settings: + main/doh-providers Signature failed InvalidSignatureError: Invalid content signature (main/doh-providers) +console.error: services.settings: + main/doh-providers local data was corrupted +console.warn: services.settings: main/doh-providers Signature verified failed. Retry from scratch +console.error: services.settings: + main/doh-config Signature failed InvalidSignatureError: Invalid content signature (main/doh-config) +console.error: services.settings: + main/doh-config local data was corrupted +console.warn: services.settings: main/doh-config Signature verified failed. Retry from scratch +console.log: WebExtensions: reset-default-search: starting. +console.log: WebExtensions: reset-default-search: No addons in our list are installed. +1628613026021 geckodriver INFO Listening on 127.0.0.1:64876 +1628613027052 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileYMY1ax" +1628613028446 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileYMY1ax/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613030074 Marionette INFO Listening on port 64887 +1628613030160 Marionette WARN TLS certificate errors will be ignored for this session +1628613030242 Marionette INFO Stopped listening on port 64887 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613348978 geckodriver INFO Listening on 127.0.0.1:64976 +1628613349992 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileVRTOpq" +1628613350519 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileVRTOpq/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613351914 Marionette INFO Listening on port 64987 +1628613351962 Marionette WARN TLS certificate errors will be ignored for this session +1628613352030 Marionette INFO Stopped listening on port 64987 +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. +1628613450229 geckodriver INFO Listening on 127.0.0.1:65038 +1628613450241 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile64kM1D" +1628613450561 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile64kM1D/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613451917 Marionette INFO Listening on port 65047 +1628613452008 Marionette WARN TLS certificate errors will be ignored for this session +1628613452071 Marionette INFO Stopped listening on port 65047 +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613627119 geckodriver INFO Listening on 127.0.0.1:65103 +1628613627127 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileCc9oYw" +1628613627700 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileCc9oYw/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613629251 Marionette INFO Listening on port 65112 +1628613629301 Marionette WARN TLS certificate errors will be ignored for this session +1628613629370 Marionette INFO Stopped listening on port 65112 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613643611 geckodriver INFO Listening on 127.0.0.1:65160 +1628613643623 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileoybbyR" +1628613643959 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileoybbyR/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613645266 Marionette INFO Listening on port 65169 +1628613645284 Marionette WARN TLS certificate errors will be ignored for this session +1628613645357 Marionette INFO Stopped listening on port 65169 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613836574 geckodriver INFO Listening on 127.0.0.1:65229 +1628613837588 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile5MBxxz" +1628613838086 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile5MBxxz/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613839412 Marionette INFO Listening on port 65240 +1628613839460 Marionette WARN TLS certificate errors will be ignored for this session +1628613839530 Marionette INFO Stopped listening on port 65240 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614476292 geckodriver INFO Listening on 127.0.0.1:65454 +1628614477308 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofilejoVBxm" +1628614477873 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofilejoVBxm/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614479426 Marionette INFO Listening on port 65465 +1628614479480 Marionette WARN TLS certificate errors will be ignored for this session +1628614479575 Marionette INFO Stopped listening on port 65465 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614492073 geckodriver INFO Listening on 127.0.0.1:65513 +1628614492083 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileGtd7ye" +1628614492375 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileGtd7ye/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614493726 Marionette INFO Listening on port 65522 +1628614493755 Marionette WARN TLS certificate errors will be ignored for this session +1628614493824 Marionette INFO Stopped listening on port 65522 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614698682 geckodriver INFO Listening on 127.0.0.1:49202 +1628614698690 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileNp6ShI" +1628614699149 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileNp6ShI/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614700490 Marionette INFO Listening on port 49211 +1628614700576 Marionette WARN TLS certificate errors will be ignored for this session +1628614700883 Marionette INFO Stopped listening on port 49211 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614938758 geckodriver INFO Listening on 127.0.0.1:49281 +1628614939774 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile631HCJ" +1628614940331 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile631HCJ/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614941603 Marionette INFO Listening on port 49292 +1628614941677 Marionette WARN TLS certificate errors will be ignored for this session +1628614941998 Marionette INFO Stopped listening on port 49292 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..0c25cdd --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,19 @@ +import unittest +import json +from flask import request + +from app import app + +class TestApi(unittest.TestCase): + + def test_ner_endpoint_given_json_body_returns_200(self): + with app.test_client() as client: + response = client.post('/ner', json={"sentence": "Steve Malkmu is in a good band."}) + assert response._status_code == 200 + + def test_ner_endpoint_given_json_body_with_known_entities_returns_entity_result_in_response(self): + with app.test_client() as client: + response = client.post('/ner', json={"sentence": "Kamala Harris"}) + data = json.loads(response.get_data()) + assert data['entities'][0]['ent'] == 'Kamala Harris' + assert data['entities'][0]['label'] == 'Person' \ No newline at end of file diff --git a/test/test_doubles.py b/test/test_doubles.py new file mode 100644 index 0000000..cc2f605 --- /dev/null +++ b/test/test_doubles.py @@ -0,0 +1,24 @@ +class NerModelTestDouble: + def __init__(self, model): + self.model = model + + def returns_doc_ents(self, ents): + self.ents = ents + + def __call__(self, sent): + return DocTestDouble(sent, self.ents) + +class SpanTestDouble: + def __init__(self, text, label): + self.text = text + self.label_ = label + +class DocTestDouble: + def __init__(self, sent, ents): + self.ents = [SpanTestDouble(ent['text'], ent['label_']) for ent in ents] + + def patch_method(self, attr, return_value): + def patched(): return return_value + setattr(self, attr, patched) + return self + diff --git a/test/test_index_e2e.py b/test/test_index_e2e.py new file mode 100644 index 0000000..26faa65 --- /dev/null +++ b/test/test_index_e2e.py @@ -0,0 +1,38 @@ +import unittest +from selenium import webdriver + +class E2ETests(unittest.TestCase): + + def setUp(self): + self.driver = webdriver.Firefox(executable_path=r'/Users/thomaskline/Applications/geckodriver') + self.driver.get('http://localhost:5000') + + def tearDown(self): + self.driver.quit() + + def test_browser_title_contains_app_name(self): + self.assertIn('Named Entity', self.driver.title) + + def test_page_heading_is_named_entity_finder(self): + heading = self._find("heading").text + self.assertEqual('Named Entity Finder', heading) + + def test_page_has_input_for_text(self): + input_element = self._find('input-text') + self.assertIsNotNone(input_element) + + def test_page_has_button_for_submitting_text(self): + submit_button = self._find('find-button') + self.assertIsNotNone(submit_button) + + def test_page_has_ner_table(self): + input_element = self._find('input-text') + submit_button = self._find('find-button') + input_element.send_keys('France and Germany share a border in Europe') + submit_button.click() + table = self._find('ner-table') + self.assertIsNotNone(table) + + + def _find(self, val): + return self.driver.find_element_by_css_selector(f'[data-test-id="{val}"]') diff --git a/test/test_ner_client.py b/test/test_ner_client.py new file mode 100644 index 0000000..ddc4221 --- /dev/null +++ b/test/test_ner_client.py @@ -0,0 +1,78 @@ +import unittest +from ner_client import NamedEntityClient +from test_doubles import NerModelTestDouble + + +class TestNerClient(unittest.TestCase): + + # { ents: [{...}], + # html: "..."} + + def test_get_ents_returns_dictionary_given_empty_string_causes_empty_spacy_doc_ents(self): + model = NerModelTestDouble('eng') + model.returns_doc_ents([]) + ner = NamedEntityClient(model) + ents = ner.get_ents("") + self.assertIsInstance(ents, dict) + + def test_get_ents_returns_dictionary_given_nonempty_string_causes_empty_spacy_doc_ents(self): + model = NerModelTestDouble('eng') + model.returns_doc_ents([]) + ner = NamedEntityClient(model) + ents = ner.get_ents("Madison is a city in Wisconsin") + self.assertIsInstance(ents, dict) + + def test_get_ents_given_spacy_PERSON_is_returned_serializes_to_Person(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Laurent Fressinet', 'label_': 'PERSON'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Laurent Fressinet', 'label': 'Person'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_NORP_is_returned_serializes_to_Group(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Lithuanian', 'label_': 'NORP'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Lithuanian', 'label': 'Group'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_LOC_is_returned_serializes_to_Location(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'the ocean', 'label_': 'LOC'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'the ocean', 'label': 'Location'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_LANGUAGE_is_returned_serializes_to_Language(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'ASL', 'label_': 'LANGUAGE'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'ASL', 'label': 'Language'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_GPE_is_returned_serializes_to_Location(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Australia', 'label_': 'GPE'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Australia', 'label': 'Location'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_multiple_ents_serializes_all(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Australia', 'label_': 'GPE'}, {'text': 'Judith Polgar', 'label_': 'PERSON'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": + [{'ent': 'Australia', 'label': 'Location'}, {'ent': 'Judith Polgar', 'label': 'Person'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) \ No newline at end of file