ref: moved discovery service to api

This commit is contained in:
nearlynithin
2025-11-07 15:11:04 +05:30
parent 666461346b
commit c239d86bd8
3 changed files with 93 additions and 21 deletions

View File

@@ -8,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
import os import os
from api.routers import analyze, clean from api.routers import analyze, clean, discovery
# Create FastAPI app # Create FastAPI app
app = FastAPI( app = FastAPI(
@@ -37,6 +37,7 @@ app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
# Include routers # Include routers
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"]) app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"]) app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
app.include_router(discovery.router, prefix="/api", tags=["Discover sources"])
@app.get("/") @app.get("/")
async def root(): async def root():

View File

@@ -1,19 +1,13 @@
from spacy.matcher import PhraseMatcher, Matcher
from fastapi import APIRouter, File, UploadFile
from fastapi.responses import FileResponse
from pathlib import Path
from collections import defaultdict
import csv, tempfile import csv, tempfile
import re import re
from pathlib import Path
from collections import Counter
from datetime import datetime
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import FileResponse
import uvicorn
import pprint
app = FastAPI()
import spacy import spacy
from spacy.matcher import PhraseMatcher, Matcher
from collections import defaultdict router = APIRouter()
import re
try: try:
nlp = spacy.load("en_core_web_trf") nlp = spacy.load("en_core_web_trf")
@@ -179,7 +173,7 @@ fieldnames = [
# this route will accept every txt/log file that is not csv # this route will accept every txt/log file that is not csv
@app.post("/api/files") @router.post("/files")
async def postFile(file: UploadFile): async def postFile(file: UploadFile):
if file.filename.endswith("csv"): if file.filename.endswith("csv"):
return {"error" : "Cannot accept csv files"} return {"error" : "Cannot accept csv files"}
@@ -226,11 +220,7 @@ async def postFile(file: UploadFile):
writer.writerow(row) writer.writerow(row)
print(pprint.pprint(row))
return FileResponse( return FileResponse(
temp_path, media_type="text/csv", filename="dataset.csv" temp_path, media_type="text/csv", filename="dataset.csv"
) )
if __name__ == '__main__':
uvicorn.run(app)

View File

@@ -20,4 +20,85 @@ python-multipart>=0.0.6
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121 # torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
# Chatbot (WIP - not exposed in API yet) # Chatbot (WIP - not exposed in API yet)
gpt4all>=2.0.0 gpt4all>=2.0.0annotated-doc==0.0.3
annotated-types==0.7.0
anyio==4.11.0
blis==1.3.0
catalogue==2.0.10
certifi==2025.10.5
cffi==2.0.0
charset-normalizer==3.4.4
click==8.3.0
cloudpathlib==0.23.0
confection==0.1.5
cryptography==44.0.3
cymem==2.0.11
download==0.3.5
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
exceptiongroup==1.3.0
fastapi==0.121.0
filelock==3.20.0
gpt4all==2.8.2
h11==0.16.0
httpcore==1.0.9
httptools==0.7.1
httpx==0.28.1
idna==3.11
Jinja2==3.1.6
joblib==1.5.2
langcodes==3.5.0
language_data==1.3.0
marisa-trie==1.3.1
markdown-it-py==4.0.0
MarkupSafe==3.0.3
mdurl==0.1.2
murmurhash==1.0.13
numpy==2.2.6
ollama==0.6.0
packaging==25.0
pandas==2.3.3
phonenumbers==9.0.18
preshed==3.0.10
presidio_analyzer==2.2.360
presidio_anonymizer==2.2.360
pycparser==2.23
pydantic==2.12.4
pydantic_core==2.41.5
Pygments==2.19.2
python-dateutil==2.9.0.post0
python-dotenv==1.2.1
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.3
regex==2025.11.3
requests==2.32.5
requests-file==3.0.1
rich==14.2.0
scikit-learn==1.7.2
scipy==1.15.3
shellingham==1.5.4
six==1.17.0
smart_open==7.4.4
sniffio==1.3.1
spacy==3.8.7
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.5.1
starlette==0.49.3
thinc==8.3.8
threadpoolctl==3.6.0
tldextract==5.3.0
tqdm==4.67.1
typer==0.20.0
typer-slim==0.20.0
typing-inspection==0.4.2
typing_extensions==4.15.0
tzdata==2025.2
urllib3==2.5.0
uvicorn==0.38.0
uvloop==0.22.1
wasabi==1.1.3
watchfiles==1.1.1
weasel==0.4.2
websockets==15.0.1
wrapt==2.0.1