Spaces:
Runtime error
Runtime error
| from typing import Dict, List | |
| from fastapi import FastAPI, HTTPException, Query | |
| from fastapi.responses import RedirectResponse | |
| from gr_nlp_toolkit import Pipeline | |
| from pydantic import BaseModel, Field | |
| app = FastAPI( | |
| title="The Grεεk NLP API 🇬🇷", | |
| description="State-of-the-art API for Greek NLP tasks including Greeklish to Greek conversion (G2G), Named Entity Recognition (NER), Part-of-Speech (POS) tagging, and Dependency Parsing (DP). The API is powered by the Grεεk NLP Toolkit ([https://github.com/nlpaueb/gr-nlp-toolkit/](https://github.com/nlpaueb/gr-nlp-toolkit/)), which is also available via PyPI (`pip install gr-nlp-toolkit`). ", | |
| version="1.0.0", | |
| contact={ | |
| "name": "Natural Language Processing Group - Athens University of Economics and Business (AUEB)", | |
| "url": "http://nlp.cs.aueb.gr/", | |
| "api_author": "Lefteris Loukas", | |
| }, | |
| ) | |
| # Instantiate the Pipeline | |
| nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g") | |
| # Pydantic models for responses | |
| class G2GOutput(BaseModel): | |
| greek_text: str = Field( | |
| ..., | |
| example="η θεσσαλονικη ειναι ωραια πολη", | |
| description="Converted Greek text", | |
| ) | |
| class NERItem(BaseModel): | |
| token: str = Field(..., example="αργεντινη") | |
| ner_value: str = Field(..., example="S-ORG") | |
| class POSItem(BaseModel): | |
| token: str = Field(..., example="μου") | |
| upos: str = Field(..., example="PRON") | |
| morphological_features: Dict[str, str] = Field( | |
| ..., | |
| example={ | |
| "Case": "Gen", | |
| "Gender": "Masc", | |
| "Number": "Sing", | |
| "Person": "1", | |
| "Poss": "_", | |
| "PronType": "Prs", | |
| }, | |
| ) | |
| class POSResponse(BaseModel): | |
| pos_results: List[POSItem] = Field( | |
| ..., | |
| description="Part-of-Speech tagging information", | |
| example=[ | |
| { | |
| "token": "μου", | |
| "upos": "PRON", | |
| "morphological_features": { | |
| "Case": "Gen", | |
| "Gender": "Masc", | |
| "Number": "Sing", | |
| "Person": "1", | |
| "Poss": "_", | |
| "PronType": "Prs", | |
| }, | |
| }, | |
| { | |
| "token": "αρεσει", | |
| "upos": "VERB", | |
| "morphological_features": { | |
| "Aspect": "Imp", | |
| "Case": "_", | |
| "Gender": "_", | |
| "Mood": "Ind", | |
| "Number": "Sing", | |
| "Person": "3", | |
| "Tense": "Pres", | |
| "VerbForm": "Fin", | |
| "Voice": "Act", | |
| }, | |
| }, | |
| { | |
| "token": "να", | |
| "upos": "AUX", | |
| "morphological_features": { | |
| "Aspect": "_", | |
| "Mood": "_", | |
| "Number": "_", | |
| "Person": "_", | |
| "Tense": "_", | |
| "VerbForm": "_", | |
| "Voice": "_", | |
| }, | |
| }, | |
| { | |
| "token": "διαβαζω", | |
| "upos": "VERB", | |
| "morphological_features": { | |
| "Aspect": "Imp", | |
| "Case": "_", | |
| "Gender": "_", | |
| "Mood": "Ind", | |
| "Number": "Sing", | |
| "Person": "1", | |
| "Tense": "Pres", | |
| "VerbForm": "Fin", | |
| "Voice": "Act", | |
| }, | |
| }, | |
| { | |
| "token": "τα", | |
| "upos": "DET", | |
| "morphological_features": { | |
| "Case": "Acc", | |
| "Definite": "Def", | |
| "Gender": "Neut", | |
| "Number": "Plur", | |
| "PronType": "Art", | |
| }, | |
| }, | |
| { | |
| "token": "post", | |
| "upos": "X", | |
| "morphological_features": {"Foreign": "Yes"}, | |
| }, | |
| { | |
| "token": "του", | |
| "upos": "DET", | |
| "morphological_features": { | |
| "Case": "Gen", | |
| "Definite": "Def", | |
| "Gender": "Masc", | |
| "Number": "Sing", | |
| "PronType": "Art", | |
| }, | |
| }, | |
| { | |
| "token": "andrew", | |
| "upos": "X", | |
| "morphological_features": {"Foreign": "Yes"}, | |
| }, | |
| { | |
| "token": "ng", | |
| "upos": "X", | |
| "morphological_features": {"Foreign": "Yes"}, | |
| }, | |
| {"token": "στο", "upos": "_", "morphological_features": {}}, | |
| { | |
| "token": "twitter", | |
| "upos": "X", | |
| "morphological_features": {"Foreign": "Yes"}, | |
| }, | |
| {"token": ".", "upos": "PUNCT", "morphological_features": {}}, | |
| ], | |
| ) | |
| class DPItem(BaseModel): | |
| token: str = Field(..., example="προτιμω") | |
| head: int = Field(..., example=0) | |
| deprel: str = Field(..., example="root") | |
| class DPResponse(BaseModel): | |
| dp_results: List[DPItem] = Field( | |
| ..., | |
| description="Dependency Parsing information", | |
| example=[ | |
| {"token": "προτιμω", "head": 0, "deprel": "root"}, | |
| {"token": "την", "head": 4, "deprel": "det"}, | |
| {"token": "πρωινη", "head": 4, "deprel": "amod"}, | |
| {"token": "πτηση", "head": 1, "deprel": "obj"}, | |
| {"token": "απο", "head": 7, "deprel": "case"}, | |
| {"token": "την", "head": 7, "deprel": "det"}, | |
| {"token": "αθηνα", "head": 4, "deprel": "nmod"}, | |
| {"token": "στη", "head": 9, "deprel": "case"}, | |
| {"token": "θεσσαλονικη", "head": 4, "deprel": "nmod"}, | |
| {"token": ".", "head": 1, "deprel": "punct"}, | |
| ], | |
| ) | |
| # API endpoints | |
| async def greeklish_to_greek( | |
| text: str = Query( | |
| ..., | |
| description="The Greeklish text to convert", | |
| example="H thessaloniki einai wraia polh", | |
| ), | |
| ): | |
| """ | |
| The G2G (Greeklish-to-Greek) endpoint takes Greeklish text (Greek written with Latin characters) as input and transliterates it to Greek text. | |
| """ | |
| try: | |
| greek_text = " ".join( | |
| [token.text for token in nlp_pos_ner_dp_with_g2g(text).tokens] | |
| ) | |
| return G2GOutput(greek_text=greek_text) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| class NERResponse(BaseModel): | |
| ner_results: List[NERItem] = Field( | |
| ..., | |
| description="Named Entity Recognition information", | |
| example=[ | |
| {"token": "η", "ner_value": "O"}, | |
| {"token": "αργεντινη", "ner_value": "S-ORG"}, | |
| {"token": "κερδισε", "ner_value": "O"}, | |
| {"token": "το", "ner_value": "O"}, | |
| {"token": "παγκοσμιο", "ner_value": "B-EVENT"}, | |
| {"token": "κυπελλο", "ner_value": "E-EVENT"}, | |
| {"token": "το", "ner_value": "O"}, | |
| {"token": "2022", "ner_value": "S-DATE"}, | |
| ], | |
| ) | |
| # @app.post("/ner", response_model=List[NERItem], summary="Named Entity Recognition") | |
| async def process_ner( | |
| text: str = Query( | |
| ..., | |
| description="The text to process for NER", | |
| example="Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022", | |
| ), | |
| ): | |
| """ | |
| The NER endpoint takes Greek text as input and returns a list of dictionaries with the token and the NER value. | |
| Named Entity Recognition (NER) Labels: | |
| ```python | |
| ner_possible_labels = [ | |
| 'O', 'S-GPE', 'S-ORG', 'S-CARDINAL', 'B-ORG', 'E-ORG', 'B-DATE', 'E-DATE', 'S-NORP', | |
| 'B-GPE', 'E-GPE', 'S-EVENT', 'S-DATE', 'S-PRODUCT', 'S-LOC', 'I-ORG', 'S-PERSON', | |
| 'S-ORDINAL', 'B-PERSON', 'I-PERSON', 'E-PERSON', 'B-LAW', 'I-LAW', 'E-LAW', 'B-MONEY', | |
| 'I-MONEY', 'E-MONEY', 'B-EVENT', 'I-EVENT', 'E-EVENT', 'B-FAC', 'E-FAC', 'I-DATE', | |
| 'S-PERCENT', 'B-QUANTITY', 'E-QUANTITY', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'E-WORK_OF_ART', | |
| 'I-FAC', 'S-LAW', 'S-TIME', 'B-LOC', 'E-LOC', 'I-LOC', 'S-FAC', 'B-TIME', 'E-TIME', | |
| 'S-WORK_OF_ART', 'B-PRODUCT', 'E-PRODUCT', 'B-CARDINAL', 'E-CARDINAL', 'S-MONEY', | |
| 'S-LANGUAGE', 'I-TIME', 'I-PRODUCT', 'I-GPE', 'I-QUANTITY', 'B-NORP', 'E-NORP', | |
| 'S-QUANTITY', 'B-PERCENT', 'I-PERCENT', 'E-PERCENT', 'I-CARDINAL', 'B-ORDINAL', | |
| 'I-ORDINAL', 'E-ORDINAL' | |
| ] | |
| ``` | |
| """ | |
| try: | |
| doc = nlp_pos_ner_dp_with_g2g(text) | |
| # Create a list of dictionaries, each with "token" and "ner_value" | |
| ner_list = [ | |
| {"token": token.text, "ner_value": token.ner} for token in doc.tokens | |
| ] | |
| return {"ner_results": ner_list} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # @app.post("/pos", response_model=List[POSItem], summary="Part-of-Speech Tagging") | |
| async def process_pos( | |
| text: str = Query( | |
| ..., | |
| description="The text to process for POS tagging", | |
| example="Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter.", | |
| ), | |
| ): | |
| """ | |
| The POS Tagging endpoint analyzes the input text and provides Universal POS (UPOS) tags and detailed morphological features. | |
| It returns a list of dictionaries with "token", "upos", and "morphological_features" keys. | |
| The "morphological_features" key contains a dictionary itself with detailed morphological features. | |
| The UPOS and morphological features are based on the Universal Dependencies (UD) framework: [https://universaldependencies.org/u/pos/](https://universaldependencies.org/u/pos/) | |
| Complete list of the Universal POS (UPOS) tags and morphological features: | |
| ```python | |
| {'ADJ': ['Degree', 'Number', 'Gender', 'Case'], | |
| 'ADP': ['Number', 'Gender', 'Case'], | |
| 'ADV': ['Degree', 'Abbr'], | |
| 'AUX': ['Mood', | |
| 'Aspect', | |
| 'Tense', | |
| 'Number', | |
| 'Person', | |
| 'VerbForm', | |
| 'Voice'], | |
| 'CCONJ': [], | |
| 'DET': ['Number', 'Gender', 'PronType', 'Definite', 'Case'], | |
| 'NOUN': ['Number', 'Gender', 'Abbr', 'Case'], | |
| 'NUM': ['NumType', 'Number', 'Gender', 'Case'], | |
| 'PART': [], | |
| 'PRON': ['Number', 'Gender', 'Person', 'Poss', 'PronType', 'Case'], | |
| 'PROPN': ['Number', 'Gender', 'Case'], | |
| 'PUNCT': [], | |
| 'SCONJ': [], | |
| 'SYM': [], | |
| 'VERB': ['Mood', | |
| 'Aspect', | |
| 'Tense', | |
| 'Number', | |
| 'Gender', | |
| 'Person', | |
| 'VerbForm', | |
| 'Voice', | |
| 'Case'], | |
| 'X': ['Foreign'], | |
| ``` | |
| ```python | |
| {'Abbr': ['_', 'Yes'], | |
| 'Aspect': ['Perf', '_', 'Imp'], | |
| 'Case': ['Dat', '_', 'Acc', 'Gen', 'Nom', 'Voc'], | |
| 'Definite': ['Ind', 'Def', '_'], | |
| 'Degree': ['Cmp', 'Sup', '_'], | |
| 'Foreign': ['_', 'Yes'], | |
| 'Gender': ['Fem', 'Masc', '_', 'Neut'], | |
| 'Mood': ['Ind', '_', 'Imp'], | |
| 'NumType': ['Mult', 'Card', '_', 'Ord', 'Sets'], | |
| 'Number': ['Plur', '_', 'Sing'], | |
| 'Person': ['3', '1', '_', '2'], | |
| 'Poss': ['_', 'Yes'], | |
| 'PronType': ['Ind', 'Art', '_', 'Rel', 'Dem', 'Prs', 'Ind,Rel', 'Int'], | |
| 'Tense': ['Pres', 'Past', '_'], | |
| 'VerbForm': ['Part', 'Conv', '_', 'Inf', 'Fin'], | |
| 'Voice': ['Pass', 'Act', '_'], | |
| ``` | |
| """ | |
| try: | |
| doc = nlp_pos_ner_dp_with_g2g(text) | |
| # Create a list of dictionaries, each with "token", "upos", and "morphological_features" | |
| pos_list = [ | |
| { | |
| "token": token.text, | |
| "upos": token.upos, | |
| "morphological_features": token.feats, | |
| } | |
| for token in doc.tokens | |
| ] | |
| # return pos_list | |
| return {"pos_results": pos_list} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # @app.post("/dp", response_model=List[DPItem], summary="Dependency Parsing") | |
| async def process_dp( | |
| text: str = Query( | |
| ..., | |
| description="The text to process for Dependency Parsing", | |
| example="Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη", | |
| ), | |
| ): | |
| """ | |
| The Dependency Parsing endpoint analyzes the syntactic structure of the input text. | |
| It provides the tokens' (syntactic) heads and dependency relations. A head value of 0 indicates the root. | |
| More specifically, the endpoint returns a list of dictionaries with "token", "head", and "deprel" keys. | |
| Dependency Parsing Labels: | |
| ```python | |
| dp_possible_labels = ['obl', 'obj', 'dep', 'mark', 'case', 'flat', 'nummod', 'obl:arg', 'punct', 'cop', | |
| 'acl:relcl', 'expl', 'nsubj', 'csubj:pass', 'root', 'advmod', 'nsubj:pass', 'ccomp', | |
| 'conj', 'amod', 'xcomp', 'aux', 'appos', 'csubj', 'fixed', 'nmod', 'iobj', 'parataxis', | |
| 'orphan', 'det', 'advcl', 'vocative', 'compound', 'cc', 'discourse', 'acl', 'obl:agent'] | |
| ``` | |
| """ | |
| try: | |
| doc = nlp_pos_ner_dp_with_g2g(text) | |
| # Create a list of dictionaries, each with "token", "head", and "deprel" | |
| dp_list = [ | |
| {"token": token.text, "head": token.head, "deprel": token.deprel} | |
| for token in doc.tokens | |
| ] | |
| return {"dp_results": dp_list} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def root(): | |
| return RedirectResponse(url="/docs#") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app) | |