From fb41e4f7c5cc81fe2e065e840e93780d2027d4ff Mon Sep 17 00:00:00 2001 From: Thomas Kline Date: Mon, 9 Aug 2021 11:33:06 -0400 Subject: [PATCH 1/4] Initializing project --- .idea/.gitignore | 8 +++++ .idea/aws.xml | 17 ++++++++++ .idea/flask-ner.iml | 15 ++++++++ .../inspectionProfiles/profiles_settings.xml | 6 ++++ .idea/misc.xml | 4 +++ .idea/modules.xml | 8 +++++ flaskner.egg-info/PKG-INFO | 10 ++++++ flaskner.egg-info/SOURCES.txt | 5 +++ flaskner.egg-info/dependency_links.txt | 1 + flaskner.egg-info/top_level.txt | 1 + ner_client.py | 10 ++++++ setup.py | 8 +++++ ...est_ner_client.cpython-38-pytest-6.2.4.pyc | Bin 0 -> 760 bytes test/test_doubles.py | 24 +++++++++++++ test/test_ner_client.py | 32 ++++++++++++++++++ 15 files changed, 149 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/aws.xml create mode 100644 .idea/flask-ner.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 flaskner.egg-info/PKG-INFO create mode 100644 flaskner.egg-info/SOURCES.txt create mode 100644 flaskner.egg-info/dependency_links.txt create mode 100644 flaskner.egg-info/top_level.txt create mode 100644 ner_client.py create mode 100644 setup.py create mode 100644 test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc create mode 100644 test/test_doubles.py create mode 100644 test/test_ner_client.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/aws.xml b/.idea/aws.xml new file mode 100644 index 0000000..1850186 --- /dev/null +++ b/.idea/aws.xml @@ -0,0 +1,17 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/flask-ner.iml b/.idea/flask-ner.iml new file mode 100644 index 0000000..8c8b8cf --- /dev/null +++ b/.idea/flask-ner.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fca3432 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..25df3e9 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/flaskner.egg-info/PKG-INFO b/flaskner.egg-info/PKG-INFO new file mode 100644 index 0000000..a98d6c8 --- /dev/null +++ b/flaskner.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 2.1 +Name: flaskner +Version: 0.0.1 +Summary: A simple NER API +Home-page: UNKNOWN +License: UNKNOWN +Platform: UNKNOWN + +UNKNOWN + diff --git a/flaskner.egg-info/SOURCES.txt b/flaskner.egg-info/SOURCES.txt new file mode 100644 index 0000000..79cc34b --- /dev/null +++ b/flaskner.egg-info/SOURCES.txt @@ -0,0 +1,5 @@ +setup.py +flaskner.egg-info/PKG-INFO +flaskner.egg-info/SOURCES.txt +flaskner.egg-info/dependency_links.txt +flaskner.egg-info/top_level.txt \ No newline at end of file diff --git a/flaskner.egg-info/dependency_links.txt b/flaskner.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/flaskner.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/flaskner.egg-info/top_level.txt b/flaskner.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/flaskner.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/ner_client.py b/ner_client.py new file mode 100644 index 0000000..609e39a --- /dev/null +++ b/ner_client.py @@ -0,0 +1,10 @@ +import spacy + +class NamedEntityClient: + def __init__(self, model): + self.model = model + + def get_ents(self, sentence): + doc = self.model(sentence) + entities = [{ 'ent': ent.text, 'label': ent.label_ } for ent in doc.ents] + return { 'ents': entities, 'html': ''} \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f7937b4 --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +from distutils.core import setup +from setuptools import find_packages + +setup( + name="flaskner", + version="0.0.1", + description="A simple NER API" +) \ No newline at end of file diff --git a/test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc b/test/__pycache__/test_ner_client.cpython-38-pytest-6.2.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a51f9d4f56500b3bcde4f572ed92e78cfe96bcc GIT binary patch literal 760 zcmb7CJ#P~+81~0CaiT^lhy}4AwhZJXCWZ<{3J7({Lc4gej9L(70HVTHEfLeo!Vp8bYsvMv!1jR9H03eNycpII_ZHM=L0 zYmSj=c5Xsk8^5Zoaj}FZ_r_s|4X$8?mjIF6kz2Z@JF4jey=7anV_O#a1H%>}9a|^J zDW9y1+UO51+Sm<@`KhC1*(i=QbkL8s&vGpz} ztZ6r{@SVD{-;GnIZepjbYuY%$fw459BRZu&mj7t9`2U1!5LzhZKo+IaRH=HS+Y0x` zN_}gKYB=!%O7=4!z!-galXw*YH{z)0^GOn)TU$lzLONbHo!YbKs_C%H#h^9Jx$x%3 z+i22c?@HrG-xPxTp$eZw>uf}l#5v%q0O_&lK}V>J$F`C<9R_)a&!$&Qkf%iOzmQna A8~^|S literal 0 HcmV?d00001 diff --git a/test/test_doubles.py b/test/test_doubles.py new file mode 100644 index 0000000..cc2f605 --- /dev/null +++ b/test/test_doubles.py @@ -0,0 +1,24 @@ +class NerModelTestDouble: + def __init__(self, model): + self.model = model + + def returns_doc_ents(self, ents): + self.ents = ents + + def __call__(self, sent): + return DocTestDouble(sent, self.ents) + +class SpanTestDouble: + def __init__(self, text, label): + self.text = text + self.label_ = label + +class DocTestDouble: + def __init__(self, sent, ents): + self.ents = [SpanTestDouble(ent['text'], ent['label_']) for ent in ents] + + def patch_method(self, attr, return_value): + def patched(): return return_value + setattr(self, attr, patched) + return self + diff --git a/test/test_ner_client.py b/test/test_ner_client.py new file mode 100644 index 0000000..a036893 --- /dev/null +++ b/test/test_ner_client.py @@ -0,0 +1,32 @@ +import unittest +from ner_client import NamedEntityClient +from test_doubles import NerModelTestDouble + + +class TestNerClient(unittest.TestCase): + + # { ents: [{...}], + # html: "..."} + + def test_get_ents_returns_dictionary_given_empty_string_causes_empty_spacy_doc_ents(self): + model = NerModelTestDouble('eng') + model.returns_doc_ents([]) + ner = NamedEntityClient(model) + ents = ner.get_ents("") + self.assertIsInstance(ents, dict) + + def test_get_ents_returns_dictionary_given_nonempty_string_causes_empty_spacy_doc_ents(self): + model = NerModelTestDouble('eng') + model.returns_doc_ents([]) + ner = NamedEntityClient(model) + ents = ner.get_ents("Madison is a city in Wisconsin") + self.assertIsInstance(ents, dict) + + def test_get_ents_given_spacy_PERSON_is_returned_serializes_to_Person(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Laurent Fressinet', 'label_': 'PERSON'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Laurent Fressinet', 'label': 'Person'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) \ No newline at end of file From 7cbe3e92031ac0ad19e37e04982fdef46b6ffe23 Mon Sep 17 00:00:00 2001 From: Thomas Kline Date: Mon, 9 Aug 2021 11:46:37 -0400 Subject: [PATCH 2/4] Added additional unit tests --- .idea/vcs.xml | 6 ++++++ ner_client.py | 13 +++++++++++-- test/test_ner_client.py | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ner_client.py b/ner_client.py index 609e39a..c257a4a 100644 --- a/ner_client.py +++ b/ner_client.py @@ -6,5 +6,14 @@ def __init__(self, model): def get_ents(self, sentence): doc = self.model(sentence) - entities = [{ 'ent': ent.text, 'label': ent.label_ } for ent in doc.ents] - return { 'ents': entities, 'html': ''} \ No newline at end of file + entities = [{ 'ent': ent.text, 'label': self.map_label(ent.label_) } for ent in doc.ents] + return { 'ents': entities, 'html': ''} + + @staticmethod + def map_label(label): + label_map = { + 'PERSON': 'Person', + 'NORP': 'Group', + 'LOC': 'Location' + } + return label_map.get(label) \ No newline at end of file diff --git a/test/test_ner_client.py b/test/test_ner_client.py index a036893..db94a94 100644 --- a/test/test_ner_client.py +++ b/test/test_ner_client.py @@ -29,4 +29,22 @@ def test_get_ents_given_spacy_PERSON_is_returned_serializes_to_Person(self): ner = NamedEntityClient(model) result = ner.get_ents('...') expected_result = { "ents": [{'ent': 'Laurent Fressinet', 'label': 'Person'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_NORP_is_returned_serializes_to_Group(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Lithuanian', 'label_': 'NORP'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Lithuanian', 'label': 'Group'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_LOC_is_returned_serializes_to_Location(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'the ocean', 'label_': 'LOC'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'the ocean', 'label': 'Location'}], 'html': ""} self.assertListEqual(result['ents'], expected_result['ents']) \ No newline at end of file From 43563e3336b37bb2e852078a9ed2761a151b39b8 Mon Sep 17 00:00:00 2001 From: Thomas Kline Date: Tue, 10 Aug 2021 12:55:23 -0400 Subject: [PATCH 3/4] adding e2e tests --- app.py | 10 ++++ ner_client.py | 4 +- templates/index.html | 14 +++++ test/geckodriver.log | 125 ++++++++++++++++++++++++++++++++++++++++ test/test_index_e2e.py | 30 ++++++++++ test/test_ner_client.py | 28 +++++++++ 6 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 app.py create mode 100644 templates/index.html create mode 100644 test/geckodriver.log create mode 100644 test/test_index_e2e.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..56de837 --- /dev/null +++ b/app.py @@ -0,0 +1,10 @@ +from flask import Flask, render_template, request + +app = Flask(__name__) + +@app.route('/') +def index(): + return render_template('index.html') + +if __name__ == '__main__': + app.run(debug=True) \ No newline at end of file diff --git a/ner_client.py b/ner_client.py index c257a4a..1ec0287 100644 --- a/ner_client.py +++ b/ner_client.py @@ -14,6 +14,8 @@ def map_label(label): label_map = { 'PERSON': 'Person', 'NORP': 'Group', - 'LOC': 'Location' + 'LOC': 'Location', + 'GPE': 'Location', + 'LANGUAGE': 'Language' } return label_map.get(label) \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..adcc9b9 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,14 @@ + + + + + Named Entity Finder + + +

+ Named Entity Finder +

+ + + + \ No newline at end of file diff --git a/test/geckodriver.log b/test/geckodriver.log new file mode 100644 index 0000000..08525a0 --- /dev/null +++ b/test/geckodriver.log @@ -0,0 +1,125 @@ +1628545153397 geckodriver INFO Listening on 127.0.0.1:61906 +1628545153429 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileDzoXLR" +1628545158226 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileDzoXLR/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628545159935 Marionette INFO Listening on port 61915 +1628545160035 Marionette WARN TLS certificate errors will be ignored for this session +console.error: services.settings: + main/doh-providers Signature failed InvalidSignatureError: Invalid content signature (main/doh-providers) +console.error: services.settings: + main/doh-providers local data was corrupted +console.warn: services.settings: main/doh-providers Signature verified failed. Retry from scratch +console.error: services.settings: + main/doh-config Signature failed InvalidSignatureError: Invalid content signature (main/doh-config) +console.error: services.settings: + main/doh-config local data was corrupted +console.warn: services.settings: main/doh-config Signature verified failed. Retry from scratch +console.log: WebExtensions: reset-default-search: starting. +console.log: WebExtensions: reset-default-search: No addons in our list are installed. +1628613026021 geckodriver INFO Listening on 127.0.0.1:64876 +1628613027052 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileYMY1ax" +1628613028446 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileYMY1ax/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613030074 Marionette INFO Listening on port 64887 +1628613030160 Marionette WARN TLS certificate errors will be ignored for this session +1628613030242 Marionette INFO Stopped listening on port 64887 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613348978 geckodriver INFO Listening on 127.0.0.1:64976 +1628613349992 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileVRTOpq" +1628613350519 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileVRTOpq/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613351914 Marionette INFO Listening on port 64987 +1628613351962 Marionette WARN TLS certificate errors will be ignored for this session +1628613352030 Marionette INFO Stopped listening on port 64987 +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. +1628613450229 geckodriver INFO Listening on 127.0.0.1:65038 +1628613450241 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile64kM1D" +1628613450561 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile64kM1D/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613451917 Marionette INFO Listening on port 65047 +1628613452008 Marionette WARN TLS certificate errors will be ignored for this session +1628613452071 Marionette INFO Stopped listening on port 65047 +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613627119 geckodriver INFO Listening on 127.0.0.1:65103 +1628613627127 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileCc9oYw" +1628613627700 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileCc9oYw/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613629251 Marionette INFO Listening on port 65112 +1628613629301 Marionette WARN TLS certificate errors will be ignored for this session +1628613629370 Marionette INFO Stopped listening on port 65112 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613643611 geckodriver INFO Listening on 127.0.0.1:65160 +1628613643623 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileoybbyR" +1628613643959 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileoybbyR/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613645266 Marionette INFO Listening on port 65169 +1628613645284 Marionette WARN TLS certificate errors will be ignored for this session +1628613645357 Marionette INFO Stopped listening on port 65169 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628613836574 geckodriver INFO Listening on 127.0.0.1:65229 +1628613837588 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile5MBxxz" +1628613838086 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile5MBxxz/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628613839412 Marionette INFO Listening on port 65240 +1628613839460 Marionette WARN TLS certificate errors will be ignored for this session +1628613839530 Marionette INFO Stopped listening on port 65240 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614476292 geckodriver INFO Listening on 127.0.0.1:65454 +1628614477308 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofilejoVBxm" +1628614477873 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofilejoVBxm/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614479426 Marionette INFO Listening on port 65465 +1628614479480 Marionette WARN TLS certificate errors will be ignored for this session +1628614479575 Marionette INFO Stopped listening on port 65465 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown +console.error: services.settings: + main/message-groups Signature failed TypeError: NetworkError when attempting to fetch resource. + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614492073 geckodriver INFO Listening on 127.0.0.1:65513 +1628614492083 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileGtd7ye" +1628614492375 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileGtd7ye/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614493726 Marionette INFO Listening on port 65522 +1628614493755 Marionette WARN TLS certificate errors will be ignored for this session +1628614493824 Marionette INFO Stopped listening on port 65522 +console.warn: services.settings: main/partitioning-exempt-urls sync interrupted by shutdown + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + diff --git a/test/test_index_e2e.py b/test/test_index_e2e.py new file mode 100644 index 0000000..6ee6834 --- /dev/null +++ b/test/test_index_e2e.py @@ -0,0 +1,30 @@ +import unittest +from selenium import webdriver + +class E2ETests(unittest.TestCase): + + def setUp(self): + self.driver = webdriver.Firefox(executable_path=r'/Users/thomaskline/Applications/geckodriver') + self.driver.get('http://localhost:5000') + + def tearDown(self): + self.driver.quit() + + def test_browser_title_contains_app_name(self): + self.assertIn('Named Entity', self.driver.title) + + def test_page_heading_is_named_entity_finder(self): + heading = self._find("heading").text + self.assertEqual('Named Entity Finder', heading) + + def test_page_has_input_for_text(self): + input_element = self._find('input-text') + self.assertIsNotNone(input_element) + + def test_page_has_button_for_submitting_text(self): + submit_button = self._find('find-button') + self.assertIsNotNone(submit_button) + + + def _find(self, val): + return self.driver.find_element_by_css_selector(f'[data-test-id="{val}"]') diff --git a/test/test_ner_client.py b/test/test_ner_client.py index db94a94..ddc4221 100644 --- a/test/test_ner_client.py +++ b/test/test_ner_client.py @@ -47,4 +47,32 @@ def test_get_ents_given_spacy_LOC_is_returned_serializes_to_Location(self): ner = NamedEntityClient(model) result = ner.get_ents('...') expected_result = { "ents": [{'ent': 'the ocean', 'label': 'Location'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_LANGUAGE_is_returned_serializes_to_Language(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'ASL', 'label_': 'LANGUAGE'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'ASL', 'label': 'Language'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_spacy_GPE_is_returned_serializes_to_Location(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Australia', 'label_': 'GPE'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": [{'ent': 'Australia', 'label': 'Location'}], 'html': ""} + self.assertListEqual(result['ents'], expected_result['ents']) + + def test_get_ents_given_multiple_ents_serializes_all(self): + model = NerModelTestDouble('eng') + doc_ents = [{'text': 'Australia', 'label_': 'GPE'}, {'text': 'Judith Polgar', 'label_': 'PERSON'}] + model.returns_doc_ents(doc_ents) + ner = NamedEntityClient(model) + result = ner.get_ents('...') + expected_result = { "ents": + [{'ent': 'Australia', 'label': 'Location'}, {'ent': 'Judith Polgar', 'label': 'Person'}], 'html': ""} self.assertListEqual(result['ents'], expected_result['ents']) \ No newline at end of file From 50d105e3f9cc553ae85c137d466a6af013b4d474 Mon Sep 17 00:00:00 2001 From: Thomas Kline Date: Wed, 11 Aug 2021 08:54:23 -0400 Subject: [PATCH 4/4] Added cors support --- __pycache__/ner_client.cpython-38.pyc | Bin 0 -> 1197 bytes app.py | 18 +++++++- ner_client.py | 17 +++---- static/ner.js | 63 ++++++++++++++++++++++++++ static/site.css | 3 ++ templates/index.html | 62 ++++++++++++++++++++----- test/geckodriver.log | 24 ++++++++++ test/test_api.py | 19 ++++++++ test/test_index_e2e.py | 8 ++++ 9 files changed, 193 insertions(+), 21 deletions(-) create mode 100644 __pycache__/ner_client.cpython-38.pyc create mode 100644 static/ner.js create mode 100644 static/site.css create mode 100644 test/test_api.py diff --git a/__pycache__/ner_client.cpython-38.pyc b/__pycache__/ner_client.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b18d0bff2124f7865e85c562a43a571afe3ed88a GIT binary patch literal 1197 zcmZuwy-yo46t{gJm=0KB$4zSQ@33~;vnv4Yl6_+*Evf z`D|l7O-vEz!_me(d{+SZz}ji^K0tj{2!UWWLAR~2Bn0cVxP$~rQSHBPGKU`mQanYz5kV5@87v&2}j!KMjq&?WlxK^kYy z58_*m;wl8d=?1c3wt*fGaRyq+6|IT#yD(;XX1$Ki$0N&kpx+_^1K)|V{y6HZz_Wh0 zy|=#`S~uL?>sn`Lcgs4RZX0|y!_L8Grwvp)Pz?};57y4tbO1kfgE(4MgL(l@DCQ9; z;xPK}_s8d_FYfbj!*CeP;e2QC3Wcx@A>bvAQ#>ySaWan5`X4<9QS~!WcEJ?TNIX>V ogk+WXPA#_u;b@{%U~|e{?lY><5(f4D&^61~>jE~=^A`yD3rZ>#XaE2J literal 0 HcmV?d00001 diff --git a/app.py b/app.py index 56de837..8711c05 100644 --- a/app.py +++ b/app.py @@ -1,10 +1,26 @@ from flask import Flask, render_template, request +import json +import spacy +from spacy import displacy +from ner_client import NamedEntityClient +from flask_cors import CORS app = Flask(__name__) +CORS(app) + +ner = spacy.load("en_core_web_sm") +ner = NamedEntityClient(ner, displacy) @app.route('/') def index(): return render_template('index.html') -if __name__ == '__main__': +@app.route('/ner', methods=['POST']) +def get_named_ents(): + data = request.get_json() + result = ner.get_ents(data['sentence']) + response = { "entities": result.get('ents'), "html": result.get('html') } + return json.dumps(response) + +if __name__ == "__main__": app.run(debug=True) \ No newline at end of file diff --git a/ner_client.py b/ner_client.py index 1ec0287..1ddae23 100644 --- a/ner_client.py +++ b/ner_client.py @@ -1,21 +1,22 @@ -import spacy - class NamedEntityClient: - def __init__(self, model): + def __init__(self, model, displacy): self.model = model + self.displacy = displacy def get_ents(self, sentence): doc = self.model(sentence) + html = self.displacy.render(doc, style="ent") entities = [{ 'ent': ent.text, 'label': self.map_label(ent.label_) } for ent in doc.ents] - return { 'ents': entities, 'html': ''} + return { 'ents': entities, 'html': html } @staticmethod def map_label(label): label_map = { - 'PERSON': 'Person', - 'NORP': 'Group', - 'LOC': 'Location', - 'GPE': 'Location', + 'PERSON' : 'Person', + 'NORP' : 'Group', + 'LOC' : 'Location', + 'GPE' : 'Location', 'LANGUAGE': 'Language' } + return label_map.get(label) \ No newline at end of file diff --git a/static/ner.js b/static/ner.js new file mode 100644 index 0000000..f201553 --- /dev/null +++ b/static/ner.js @@ -0,0 +1,63 @@ +const fetchResult = (sent) => { + fetch('http://localhost:5000/ner', { + method: 'POST', + headers: new Headers({ 'content-type': 'application/json' }), + cache: 'no-cache', + body: JSON.stringify({ 'sentence': sent }) + }).then(async (response) => { + if (response.ok) { + let result = await response.json(); + let labeledDoc = document.getElementById('labeled-doc') + labeledDoc.innerHTML = result.html; + return buildNerTable(result.entities); + } + return Promise.reject(response); + }).catch((error) => { + console.error('Something went wrong.', error); + }); +} + +const buildNerTable = (results) => { + let table = document.getElementById('ner-table'); + + results.forEach(result => { + let row = document.createElement('tr'); + row.classList.add('ner-row'); + + let colName = document.createElement('td'); + colName.textContent = result.ent; + + let colType = document.createElement('td'); + colType.textContent = result.label; + + row.appendChild(colName); + row.appendChild(colType); + + table.appendChild(row); + }) +} + +const cleanTable = () => { + let rows = document.querySelectorAll('.ner-row'); + rows.forEach(row => { + row.remove(); + }) +} + +const updateResults = async (userInput) => { + cleanTable(); + await fetchResult(userInput.value); +} + +const init = async () => { + const submitButton = document.getElementById('find-button'); + const userInput = document.getElementById('input-text'); + + submitButton.addEventListener('click', async (e) => { + await updateResults(userInput); + }) +} + +window.onload = (event) => { + init(); +} diff --git a/static/site.css b/static/site.css new file mode 100644 index 0000000..4babbed --- /dev/null +++ b/static/site.css @@ -0,0 +1,3 @@ +#app { + max-width: 800px; +} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index adcc9b9..0fd7902 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,14 +1,52 @@ - - - - + + Named Entity Finder - - -

- Named Entity Finder -

- - - + + + + +
+ +
+

Named Entity Finder

+
+ +
+
+
+
+ +
+
+
+ +
+ +
+
+ +
+ + + + + + + +
+ Entity + + Type +
+
+
+ + \ No newline at end of file diff --git a/test/geckodriver.log b/test/geckodriver.log index 08525a0..072dfcd 100644 --- a/test/geckodriver.log +++ b/test/geckodriver.log @@ -123,3 +123,27 @@ console.warn: services.settings: main/partitioning-exempt-urls sync interrupted ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost +1628614698682 geckodriver INFO Listening on 127.0.0.1:49202 +1628614698690 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileNp6ShI" +1628614699149 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofileNp6ShI/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614700490 Marionette INFO Listening on port 49211 +1628614700576 Marionette WARN TLS certificate errors will be ignored for this session +1628614700883 Marionette INFO Stopped listening on port 49211 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1628614938758 geckodriver INFO Listening on 127.0.0.1:49281 +1628614939774 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "--marionette" "-foreground" "-no-remote" "-profile" "/var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile631HCJ" +1628614940331 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at /var/folders/4h/kktp20rn5hl_6tc9c8593lj00000gn/T/rust_mozprofile631HCJ/search.json.mozlz4", (void 0))) +console.error: BackgroundUpdate: + _reasonsToNotScheduleUpdates: Failed to check for Maintenance Service Registry Key: [Exception... "Component returned failure code: 0x80004001 (NS_ERROR_NOT_IMPLEMENTED) [nsIUpdateProcessor.getServiceRegKeyExists]" nsresult: "0x80004001 (NS_ERROR_NOT_IMPLEMENTED)" location: "JS frame :: resource://gre/modules/BackgroundUpdate.jsm :: _reasonsToNotScheduleUpdates :: line 243" data: no] +1628614941603 Marionette INFO Listening on port 49292 +1628614941677 Marionette WARN TLS certificate errors will be ignored for this session +1628614941998 Marionette INFO Stopped listening on port 49292 + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..0c25cdd --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,19 @@ +import unittest +import json +from flask import request + +from app import app + +class TestApi(unittest.TestCase): + + def test_ner_endpoint_given_json_body_returns_200(self): + with app.test_client() as client: + response = client.post('/ner', json={"sentence": "Steve Malkmu is in a good band."}) + assert response._status_code == 200 + + def test_ner_endpoint_given_json_body_with_known_entities_returns_entity_result_in_response(self): + with app.test_client() as client: + response = client.post('/ner', json={"sentence": "Kamala Harris"}) + data = json.loads(response.get_data()) + assert data['entities'][0]['ent'] == 'Kamala Harris' + assert data['entities'][0]['label'] == 'Person' \ No newline at end of file diff --git a/test/test_index_e2e.py b/test/test_index_e2e.py index 6ee6834..26faa65 100644 --- a/test/test_index_e2e.py +++ b/test/test_index_e2e.py @@ -25,6 +25,14 @@ def test_page_has_button_for_submitting_text(self): submit_button = self._find('find-button') self.assertIsNotNone(submit_button) + def test_page_has_ner_table(self): + input_element = self._find('input-text') + submit_button = self._find('find-button') + input_element.send_keys('France and Germany share a border in Europe') + submit_button.click() + table = self._find('ner-table') + self.assertIsNotNone(table) + def _find(self, val): return self.driver.find_element_by_css_selector(f'[data-test-id="{val}"]')