combining reactjs frontend with python + flask backend - javascript

I have a reactjs frontend that has a react-leaflet map. On click of the map, I can access the latitude and longitude. On this same click, I need to get a Python script to load. I have a Flask endpoint as my backend server, and my react frontend can hit this endpoint, I'm just not sure how to tie everything together and have the Python script load and work properly :(
my React code-
import { useMapEvents, Popup, Marker} from "react-leaflet";
const PopupInfo = () => {
const [markers, setMarkers] = useState([]);
const map = useMapEvents({
async click(e) {
const newMarker = e.latlng
setMarkers([...markers, newMarker])
console.log(e.latlng, "info")
//access coordinates to load the python script
const response = await fetch(`/coordinates?sel_lat=${e.latlng.lat}&sel_lon=${e.latlng.lng}`,
{
method: 'GET',
headers: {
Accept: 'application/json',
}});
console.log(response, 'TESTING PROMISE')
if (!response.ok) {
throw new Error(`Error! status: ${response.status}`);
}
const result = response.json();
console.log('result is: ', JSON.stringify(result, null, 4));
}
})
return (
<>
{markers.map((marker, index) =>
<Marker position={marker} key={index}>
<Popup>Latitude: ({marker.lat})<br></br>Longitude: ({marker.lng})</Popup>
</Marker>)}
</>
);
};
export default PopupInfo;
python code-
import argparse
import time
import pandas as pd
import datetime as dt
import json
from src.bcolors import bcolors as bc
import src.config as cfg
import src.utils as utils
import src.cfsr as cfsr
import src.gfs as gfs
def parse_args():
parser = argparse.ArgumentParser(
description="PURPOSE: Extract current meteorological information\n \
for a location and give climate context",
)
parser.add_argument(
"--sel_lat", type=float, dest="sel_lat", help="Latitude of requested location"
)
parser.add_argument(
"--sel_lon", type=float, dest="sel_lon", help="Longitude of requested location"
)
args = parser.parse_args()
##v2
# print("optional arg is: " + args.sel_lat, args.sel_lon)
return args
def main():
start = time.time()
args = parse_args()
print(f"{bc.HEADER}EXTRACT INFO FOR SELECTED LOCATION{bc.ENDC}")
print(f"{bc.HEADER}({args.sel_lat},{args.sel_lon}){bc.ENDC}")
slat = args.sel_lat
slon = args.sel_lon
slon360 = utils.lon_to_360(args.sel_lon)
if cfg.debug:
fin_ij = utils.get_ij_data(cfg.file_ref, slat, slon360)
print(
f"{bc.BOLD}Selected grid point: {fin_ij.lat.values}, {fin_ij.lon.values}{bc.ENDC}"
)
if cfg.debug:
fin_ij = utils.get_ij_data(cfg.file_ref, slat, slon360)
print(
f"{bc.BOLD}Selected grid point: {fin_ij.lat.values}, {fin_ij.lon.values}{bc.ENDC}"
)
print(f"Elapsed time initialization: {time.time()-start}s")
this_time = time.time()
sdoy = utils.calc_doy_noleap(cfg.today)
print(f"Elapsed time doy: {time.time()-this_time}s")
this_time = time.time()
# Get data for location
sdata_doy = cfsr.get_data_doy_loc(slat, slon360, sdoy)
sdata_all = cfsr.get_data_loc(slat, slon360)
print(f"Elapsed time load sdata: {time.time()-this_time}s")
this_time = time.time()
sqtiles = sdata_doy.sel(time=slice(f"{cfg.bsyear}", f"{cfg.beyear}")).quantile(
cfg.qtiles
)
print(f"Elapsed time qtiles: {time.time()-this_time}s")
this_time = time.time()
fcvars = gfs.get_loc_fcvars(slat, slon360)
print(f"Elapsed time fcvars: {time.time()-this_time}s")
this_time = time.time()
# Loading this year data
sdata_y = cfsr.get_data_this_year_loc(slat, slon360)
shmap_y = cfsr.get_hmap_this_year_loc(slat, slon360)
shwcs_y = cfsr.get_hwcs_this_year_loc(slat, slon360)
# Loading bounds (for max-min plots)
sbounds = cfsr.get_bounds_loc(slat, slon360)
print(f"Elapsed time load sdat_ty: {time.time()-this_time}s")
this_time = time.time()
doy_temp_ptile = (abs(sqtiles - fcvars)).idxmin(dim="quantile") * 100.0
print(f"Elapsed time doy qtile: {time.time()-this_time}s")
this_time = time.time()
print(
f"{bc.OKGREEN}Today's tmax {round(fcvars.tmax.values.item(),1)} at the selected point ({slat},{slon}) will be on the {int(doy_temp_ptile.tmax)}th percentile{bc.ENDC}"
)
print(
f"{bc.OKGREEN}Today's tmin {round(fcvars.tmin.values.item(),1)} at the selected point ({slat},{slon}) will be on the {int(doy_temp_ptile.tmin)}th percentile{bc.ENDC}"
)
print(
f"{bc.OKGREEN}Today's tmed {round(fcvars.tmed.values.item(),1)} at the selected point ({slat},{slon}) will be on the {int(doy_temp_ptile.tmed)}th percentile{bc.ENDC}"
)
#####################################################################
#####################################################################
sdata_doy.drop(["lat", "lon"]).to_dataframe().round(1).to_csv(
f"{cfg.wrk_dir}/temp_doy.csv", index=False
)
sdata_y.drop(["lat", "lon"]).to_dataframe().round(1).to_csv(
f"{cfg.wrk_dir}/temp_current_year.csv", index=True
)
shmap_y.drop(["lat", "lon"]).to_dataframe().round(1).to_csv(
f"{cfg.wrk_dir}/hmap_current_year.csv", index=True
)
shwcs_y.drop(["lat", "lon"]).to_dataframe().round(1).to_csv(
f"{cfg.wrk_dir}/hwcs_current_year.csv", index=True
)
sbounds_time = sbounds.assign_coords(
doy=pd.date_range(
dt.datetime(cfg.today.year, 1, 1),
dt.datetime(cfg.today.year, 12, 31),
freq="D",
)
)
sbounds_time.drop(["lat", "lon"]).to_dataframe().round(1).to_csv(
f"{cfg.wrk_dir}/bounds.csv", index=True
)
fcvars.to_dataframe().round(1).to_csv(f"{cfg.wrk_dir}/fcvars.csv", index=True)
print(f"Elapsed time write csv files: {time.time()-this_time}s")
this_time = time.time()
print(f"{bc.HEADER}Writing out json file with loc info{bc.ENDC}")
loc_stats = {
"tmax": round(fcvars.tmax.values.item(), 1),
"tmin": round(fcvars.tmin.values.item(), 1),
"tmed": round(fcvars.tmed.values.item(), 1),
"tmax_ptile": int(doy_temp_ptile.tmax),
"tmin_ptile": int(doy_temp_ptile.tmin),
"tmed_ptile": int(doy_temp_ptile.tmed),
"tmax_alltime_record_x": sdata_all.tmax.max().values.item(),
"tmax_alltime_record_n": sdata_all.tmax.min().values.item(),
"tmin_alltime_record_x": sdata_all.tmin.max().values.item(),
"tmin_alltime_record_n": sdata_all.tmin.min().values.item(),
"tmed_alltime_record_x": sdata_all.tmed.max().values.item(),
"tmed_alltime_record_n": sdata_all.tmed.min().values.item(),
}
with open(f"{cfg.wrk_dir}/loc_stats.json", "w", encoding="utf-8") as f:
f.write(json.dumps(loc_stats, indent=2))
print(f"Elapsed time write json file: {time.time()-this_time}s")
this_time = time.time()
print(f"{bc.OKGREEN}Elapsed time TOTAL: {time.time()-start}s{bc.ENDC}")
###############################################################################
# __main__ scope
###############################################################################
if __name__ == "__main__":
raise SystemExit(main())

You should make python code to API then call api in React.
Call api may be Post/Get/Patch.
As for python api you can use Flask/Fast/Django or other framework.

Related

is there a way to send message to front end from a websocket client , in django channels consumer

I have created a consumer which I callled from javascript.
In this consumer I created a websocket client (websocketApp) and a thread in which it runs forever
In that client I implemented a __on_message event from which I want to send a message back to javascript with self.send.
But there is no event.data in my javascript code when I send data with self.send in my consumer.
from the class class PriceConsumer(AsyncWebsocketConsumer):
Python code:
`
async def receive(self,text_data):
for t in threading.enumerate():
if "websocket" in t.name:
t.join()
markets = json.loads(text_data)
if "prices" not in markets:
prices = {'bnbusdt', 'ethusdt', 'btcusdt'}
else:
prices = markets['prices']
broker=markets['broker']
if broker=="Binance":
url = uri+"stream?streams="
ticker = '#ticker/'.join(prices)
ticker += '#ticker'
url = url + ticker
socket_thread =threading.Thread(
target=asyncio.run,
args=(self.talk_to_worker(
url,
text_data,
False
),)
)
socket_thread.name = "websocket-"+url
socket_thread.start()
elif broker=="IB":
usernameib = None
accountnumberib = None
passwordib = None
conIds = markets['conIds']
ib_client = IBClient(
username=usernameib,
password=passwordib,
account=accountnumberib,
is_server_running=True
) # grab the account data.
accountTickle=ib_client.tickle()
url = "wss://localhost:5000/v1/api/ws"
Dictionnary = {"broker" : broker,"tickle" : accountTickle['session'],"conIds" : conIds}
text_data = json.dumps(Dictionnary)
time.sleep(1)
self.stop = False
socket_thread = threading.Thread(
target=asyncio.run,
args=(self.talk_to_worker(
url,
text_data,
stop=False
),)
)
socket_thread.name = "websocket-"+url
socket_thread.start()
elif broker=="Bybit":
markets = json.loads(text_data)
if "prices" not in markets:
prices = {'ETHUSD', 'BTCUSD'}
else:
prices = markets['prices']
ws_url = "wss://stream.bybit.com/realtime_public"
api_key = markets['apiKey']
api_secret = markets['apiSecret']
# Generate expires.
expires = int((time.time() + 1) * 1000)
# Generate signature.
signature = str(hmac.new(
bytes(api_secret, "utf-8"),
bytes(f"GET/realtime{expires}", "utf-8"), digestmod="sha256"
).hexdigest())
param = "api_key={api_key}&expires={expires}&signature={signature}".format(
api_key=api_key,
expires=expires,
signature=signature
)
url = ws_url + "?" + param
try:
self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
self.worker_ws = websocket.WebSocketApp(url,
on_message=self.__on_message,
on_close=self.__on_close,
on_open=self.__on_open,
on_error=self.__on_error,
keep_running=True)
self.wst = threading.Thread(target=lambda: self.worker_ws.run_forever())
self.wst.daemon = True
self.wst.start()
logging.debug("Started thread")
# Wait for connect before continuing
retry_times = 5
while not self.worker_ws.sock or not self.worker_ws.sock.connected and retry_times>=0:
time.sleep(1)
retry_times -= 1
if retry_times == 0 and not self.worker_ws.sock.connected:
logging.error("Couldn't connect to WebSocket! Exiting.")
self.exit()
raise websocket.WebSocketTimeoutException('Error! Could not connect to WebSocket!.')
if api_key and api_secret:
self.__do_auth(api_key,api_secret=api_secret)
result = self.worker_ws.send('{"op":"ping"}')
self.send(result)
tosend = "{\"op\": \"subscribe\", \"args\": ["
for index,sym in enumerate(prices):
if index == (len(prices)-1):
tosend += "\"instrument_info.100ms." + str(sym) + "\"]}"
else:
tosend += "\"instrument_info.100ms." + str(sym) + "\","
self.worker_ws.send(tosend)
while True:
time.sleep(20)
result=self.worker_ws.send('{"op":"ping"}')
self.send(result)
except BaseException as err:
print(f'Unexpected exception {err=}, {type(err)=}')
def generate_signature(self, expires,api_secret):
"""Generate a request signature."""
_val = 'GET/realtime' + expires
return str(hmac.new(bytes(api_secret, "utf-8"), bytes(_val, "utf-8"), digestmod="sha256").hexdigest())
def __do_auth(self,api_key,api_secret):
expires = str(int(round(time.time()) + 1)) + "000"
signature = self.generate_signature(expires,api_secret)
auth = {}
auth["op"] = "auth"
auth["args"] = [api_key, expires, signature]
args = json.dumps(auth)
self.worker_ws.send(args)
def __on_message(self, message,second):
'''Handler for parsing WS messages.'''
asyncio.sleep(0.5)
time.sleep(0.5)
self.send(text_data=str(json.dumps(second)))
def __on_error(self, error,second):
'''Called on fatal websocket errors. We exit on these.'''
print(f"Error : {error=} , {second=}")
logging.error(f"Error : {error=} , {second=}")
#raise websocket.WebSocketException(error)
def __on_open(self,one):
'''Called when the WS opens.'''
logging.debug(f"Websocket Opened. {one=}")
def __on_close(self,one,two):
'''Called on websocket close.'''
logging.info(f'Websocket Closed {one=}, {two=}')
def ping(self,ws):
self.worker_ws.send('{"op":"ping"}')
if 'pong' not in self.data:
self.data['pong'] = []
`
Javascript code :
`
class websocketClientBybit {
constructor( )
{
this.url = 'ws://localhost:8000/price/';
}
updateTicker(cryptolist,options,apiKey,apiSecret)
{
this.connection = new WebSocket(this.url);
this.connection.addEventListener("open", () => {
this.connection.send(JSON.stringify(
{
"broker": "Bybit",
"apiKey": apiKey,
"apiSecret": apiSecret,
"prices" : cryptolist
}
))
console.log(`[websockets] Connected to ${this.url}`);
});
this.connection.addEventListener("close", () => {
console.log(`[websockets] Disconnected from ${this.url}`);
this.connection.close(1000, "Work complete");
});
this.connection.addEventListener("unload", function () {
if(this.connection.readyState == WebSocket.OPEN)
this.connection.close(1000, "Work complete");
});
this.connection.addEventListener("message", (event) => {
if (event?.data) {
var mess = event.data
console.log(mess)
}
});
}
}
export default websocketClientBybit;
`
I tried to create to create a thread making himself appeal to a worker
`
socket_thread = threading.Thread(
target=asyncio.run,
args=(self.talk_to_worker(
url,
text_data,
stop=False
),)
)
`
with an async Websocket connect
self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
async with websockets.connect(url, ssl=self.ssl_context, ping_interval=None) as worker_ws:
I tried to encapsulate the Bybit websocket with this method But the connection to Bybit websocket "wss://stream.bybit.com/realtime_public" is rejected or show with ssl error. Therfore I don't think that way, with create_connection or connect is valuable.
How could I successfully pass my data from websocket consumer back into javascript from the __on_message function with self.send ?

Load CSV file into static Python Bokeh Web app with JavaScript Papa Parse

I have a static Bokeh web app for local use and I want to be able to load a file using javascript without running python. The idea is to be able to share Bokeh output_html file with other non-python users to run it and load their data with a file selector for interactive visualization. I made a very rough code based on this post and this post
I have no knowledge in JS and I apologize in advance for the bad implementation. Please feel free if you have a similar example or a simpler approach to read a file without bokeh server.
from bokeh.models.widgets import Toggle
from bokeh.plotting import figure, output_file, show
output_file("load_data_buttons.html")
x = [0]
y = x
source = ColumnDataSource(data=dict(x=x, y=y))
plot = figure(plot_width=400, plot_height=400)
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)
callback = CustomJS(args=dict(source=source), code="""
// initialize our parsed_csv to be used wherever we want
var parsed_csv;
var start_time, end_time;
// document.ready
$(function() {
$('.load-file').on('click', function(e) {
start_time = performance.now();
$('#report').text('Processing...');
console.log('initialize worker');
var worker = new Worker('worker.js');
worker.addEventListener('message', function(ev) {
console.log('received raw CSV, now parsing...');
// Parse our CSV raw text
Papa.parse(ev.data, {
header: true,
dynamicTyping: true,
complete: function (results) {
// Save result in a globally accessible var
parsed_csv = results;
console.log('parsed CSV!');
console.log(parsed_csv);
$('#report').text(parsed_csv.data.length + ' rows processed');
end_time = performance.now();
console.log('Took ' + (end_time - start_time) + " milliseconds to load and process the CSV file.")
}
});
// Terminate our worker
worker.terminate();
}, false);
// Submit our file to load
var file_to_load = document.getElementById("myFile").files[0];
console.log('call our worker');
worker.postMessage({file: file_to_load});
});
});
x = parsed_csv.data['x']
y = parsed_csv.data['y']
#load data stored in the file name and assign to x and y
source.trigger('change');
""")
toggle1 = Toggle(label="Load data file 1", callback=callback)
layout = Row(toggle1, plot)
show(layout)
worker.js
self.addEventListener('message', function(e) {
console.log('worker is running');
var file = e.data.file;
var reader = new FileReader();
reader.onload = function (fileLoadedEvent) {
console.log('file loaded, posting back from worker');
var textFromFileLoaded = fileLoadedEvent.target.result;
// Post our text file back from the worker
self.postMessage(textFromFileLoaded);
};
// Actually load the text file
reader.readAsText(file, "UTF-8");
}, false);
The csv file has x,y data
x y
0 0
1 1
2 2
3 3
4 4
You don't need Web Workers to achieve it, especially if you're not comfortable with JavaScript.
Here's how I would do it:
from bokeh.layouts import row, column
from bokeh.models import Div, ColumnDataSource, CustomJS, FileInput
from bokeh.plotting import figure, save
source = ColumnDataSource(data=dict(x=[0], y=[0]))
plot = figure(plot_width=400, plot_height=400)
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)
fi_label = Div(text='Load data file 1')
fi = FileInput()
status = Div()
callback = CustomJS(args=dict(source=source,
status=status),
code="""
status.text = 'Loading...';
Papa.parse(atob(cb_obj.value), {
// For some reason, Papa didn't detect it automatically.
delimiter: '\t',
header: true,
dynamicTyping: true,
complete: function (results) {
const acc = results.meta.fields.reduce((acc, f) => {
acc[f] = [];
return acc;
}, {});
source.data = results.data.reduce((acc, row) => {
for (const k in acc) {
acc[k].push(row[k]);
}
return acc;
}, acc);
status.text = 'Ready!';
}
});
""")
fi.js_on_change('value', callback)
template = """\
{% block preamble %}
<script src="https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.1.0/papaparse.min.js"
integrity="sha256-Fh801SO9gqegfUdkDxyzXzIUPWzO/Vatqj8uN+5xcL4="
crossorigin="anonymous"></script>
{% endblock %}
"""
# Cannot use `show` because it doesn't have the `template` argument.
save(column(row(fi_label, fi), plot), template=template)

Node js Child Process Spawn runs whole python code every time I call

I have this code which connects Nodejs to Python script. The script contains ML models with Tensor flow backend and so on.., it basically gives a string output. I send an image URL from node js via.child process spawn to python and it gives back its recognised expression as a string. Basically I am doing facial recognition, coded in python but calling through Node js and send the string to response as JSON data(Rest API).
The problem I am facing is whenever I call spawn, it runs whole code of python and its taking so long as the python script has to load all modules if we start from the top and finally giving output.
Here is the python code
from gtts import gTTS
language = 'en'
#myobj = gTTS(text='Do you know the person? Yes or No', lang=language, slow=True)
#myobj.save("question1.mp3")
#myobj = gTTS(text='What is his or her name', lang=language, slow=True)
#myobj.save("question2.mp3")
import csv
import pandas as pd
import numpy as np
#with open('database.csv','w') as f:
# writer=csv.writer(f)
# writer.writerow(['Chinmay',embedded])
face_embeddings=np.array(pd.read_csv('database.csv',header=None))
face_names=np.array(pd.read_csv('database_names.csv',header=None))
from cv2 import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from align import AlignDlib
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
from model import create_model
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import matplotlib.pyplot as plt
from keras.models import load_model
from keras.preprocessing.image import load_img, img_to_array
from util.model import CNNModel, generate_caption_beam_search
import os
from config import config
from pickle import load
import sys
cut_size = 44
transform_test = transforms.Compose([
transforms.TenCrop(cut_size),
transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])
class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
final_text=''
nn4_small2_pretrained = create_model()
nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5')
def rgb2gray(rgb):
return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
def load_image(path):
img = cv2.imread(path, 1)
# OpenCV loads images with color channels
# in BGR order. So we need to reverse them
return img[...,::-1]
def extract_features(filename, model, model_type):
if model_type == 'inceptionv3':
from keras.applications.inception_v3 import preprocess_input
target_size = (299, 299)
elif model_type == 'vgg16':
from keras.applications.vgg16 import preprocess_input
target_size = (224, 224)
# Loading and resizing image
image = load_img(filename, target_size=target_size)
# Convert the image pixels to a numpy array
image = img_to_array(image)
# Reshape data for the model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# Prepare the image for the CNN Model model
image = preprocess_input(image)
# Pass image into model to get encoded features
features = model.predict(image, verbose=0)
return features
def getrecogstr( imgurl ):
# Path of Image
#image_file=imgurl
image_file = sys.argv[1]
# Initialize the OpenFace face alignment utility
alignment = AlignDlib('models/landmarks.dat')
# Load an image
jc_orig = load_image(image_file)
# Detect face and return bounding box -
bb = alignment.getAllFaceBoundingBoxes(jc_orig)
net = VGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'),map_location='cpu')
net.load_state_dict(checkpoint['net'])
# Load the tokenizer
tokenizer_path = config['tokenizer_path']
tokenizer = load(open(tokenizer_path, 'rb'))
# Max sequence length (from training)
max_length = config['max_length']
caption_model = load_model('model.hdf5')
image_model = CNNModel(config['model_type'])
for i in bb:
# Transform image using specified face landmark indices and crop image to 96x96
jc_aligned = alignment.align(96, jc_orig, i, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
location=(i.height()+i.width())/(jc_orig.shape[0]+jc_orig.shape[1])
# Finding the emotion of cropped image
gray = rgb2gray(jc_aligned)
gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8)
img = gray[:, :, np.newaxis]
img = np.concatenate((img, img, img), axis=2)
img = Image.fromarray(img)
inputs = transform_test(img)
#net.cuda()
net.eval()
ncrops, c, h, w = np.shape(inputs)
inputs = inputs.view(-1, c, h, w)
#inputs = inputs.cuda()
inputs = Variable(inputs, volatile=True)
outputs = net(inputs)
outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops
score = F.softmax(outputs_avg)
_, predicted = torch.max(outputs_avg.data, 0)
# Find the name of the person in the image
jc_aligned = (jc_aligned / 255.).astype(np.float32)
embeddings = nn4_small2_pretrained.predict(np.expand_dims(jc_aligned, axis=0))[0]
print("##")
print(embeddings)
matched_embeddings=1000
for j in range(len(face_embeddings)):
temp=np.sum(np.square(embeddings-face_embeddings[j]))
if (temp<=0.56 and temp <matched_embeddings):
matched_embeddings=np.sum(np.square(embeddings-face_embeddings[j]))
face_index=j
print(temp)
print('above')
if matched_embeddings!=1000:
face_name=face_names[face_index][0]
print("##known")
else:
face_name='Unknown'
print("##unknown")
#print("Unknown Person detected. Do you know this person yes or no ?")
#Play welcome1.mp3
#Play welcome2.mp3 if input is yes
final_text+= face_name+' expression is '+class_names[int(predicted.cpu().numpy())] + "."
print("##"+final_text)
sys.stdout.flush()
getrecogstr()
Here is the Node code
const express = require('express');
const app = express();
const bodyParser = require('body-parser');
const port = 1000;
const spawn = require("child_process").spawn;
app.use(bodyParser.json()); // application/json
app.use((req, res, next) => {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET, POST, PUT, PATCH, DELETE');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
next();
});
app.get('/test', (req, res, next) => {
const imgurl = req.query.imgurl;
var process = spawn('python', ["./final.py",
imgurl,
]);
process.stdout.on('data', function (data) {
const recog_str = data.toString().split('##')[3];
console.log(recog_str);
res.json(recog_str)
})
})
server.listen(port, () => {
console.log("Ok");
})
I just want to skip that part of loading modules every time. I know we have to run the modules for them to be in memory but it's taking so long. Can do like the python script is running all the time and we can send arguments from node js in the middle of that running and call a function which can return that string?
You could use a global variable and message communication between node and spawned python process.
I got the idea from this tutorial which is regarding the message queue, but the same method can be applied here.
app.js
const app = require('express')();
const uuid = require('uuid');
const spawn = require("child_process").spawn;
var py = spawn('python', ["./face.py"]);
var globalobj = {}
//whenever any data arrives, it will be stored in globalobj.
py.stdout.on('data', function (data) {
try {
const { id, msg } = JSON.parse(data.toString());
globalobj[id] = msg;
} catch (err) {
//If data chunk received is incomplete(child process sent large output) json parse fails.
}
});
const delay = () => new Promise(resolve => {
setTimeout(() => {
resolve();
}, 4000);
});
app.get('/test', async (req, res, next) => {
const url = req.query.imgurl;
const id = uuid.v4();
py.stdin.write(JSON.stringify({ id, url }) + "\n");
await delay();
//If no response has arrived from the child process, globalobj wont have id key.
if (globalobj[id] != undefined) {
res.send(globalobj[id]);
delete globalobj[id];
} else {
res.status(500).send('No response from child process');
}
});
app.listen(3000, 'localhost', () => {
console.log(`server started on port 3000`);
});
The downsides are the messages which get a response after the delay will be accumulated in the global object. Also the py.stdout.on('data', function(data){}) returns the data in stream, so if message is larger it will be split into chunks by nodejs. See this post
Reason for using \n when writing to child stdin can be found here.
main.py
import sys, json
while True:
stdin = sys.stdin.readline().replace("\n", "")
if stdin:
data = json.loads(stdin)
#do your computation here
print(json.dumps({'id': data['id'], 'msg': 'your message'}), flush=True)
stdin = None
When I quickly tested, it worked, but it may not work in all cases. Test this method well before using it.

Splash (+scrapy) does not render web page correctly

Im' using Scrapy + Splash, I have problems downloading this page: http://new.abb.com/jobs/it/center#JobCountry=IT&JobCity=any&JobFunction=any&JobRole=any&JobText='http://new.abb.com/jobs/it/center#JobCountry=IT&JobCity=any&JobFunction=any&JobRole=any&JobText=
It seems that Splash cannot execute the javascript correctly.
Here is a stripped down, working, self contanied, version of my program (sorry if not stripped down at best)
# -*- coding: utf-8 -*- import scrapy from scrapy_splash import SplashRequest from scrapy.selector import Selector from scrapy.http import HtmlResponse import sys import io import os import base64
def saveFile(ss, fileNameExt, folderName):
f = open(folderName + '/' + fileNameExt, 'w')
f.write(ss)
f.close()
return fileNameExt
def savePng(png_bytes, fileNameExt, folderName):
f = open( folderName +'/' + fileNameExt, 'wb')
f.write(png_bytes)
f.close()
return fileNameExt
def savePageOriginalInFolder(response, folderName, chiave='pag1'):
fileName = "site.html"
testo = response.data[chiave].decode('utf8')
return saveFile(testo, fileName, folderName) def savePagePng(response, folderName, pngDataName):
fileName = 'site.png'
if hasattr(response, 'data'):
png_bytes = base64.b64decode(response.data[pngDataName])
return savePng(png_bytes, fileName, folderName)
class GenericoSpider(scrapy.Spider):
name = 'provaAbb'
def asSplashRequest(self, url, callback, id_elenco="no_id", id_sessione="no_id_sessione"):
return SplashRequest(
url = url,
endpoint='execute',
args={'lua_source': self.script, 'id_elenco': id_elenco, 'id_sessione': id_sessione},
callback=callback,
)
outDir = name # prendo in nome della cartella dal nome dello spider
db_name = ""
def start_requests(self):
sito = 'http://new.abb.com/jobs/it/center#JobCountry=IT&JobCity=any&JobFunction=any&JobRole=any&JobText='
yield self.asSplashRequest(sito, self.parse_list, 'id_mio_elenco')
script = """
function main(splash)
local url = splash.args.url
splash:set_viewport_size(1280, 2500)
splash:init_cookies(splash.args.cookies)
assert(splash:go(url))
assert(splash:wait(10))
return {
url = splash:url(),
pag1 = splash:html(),
png1 = splash:png(),
id_elenco = splash.args.id_elenco,
id_sessione = splash.args.id_sessione,
cookies = splash:get_cookies(),
tt = splash.args
}
end
"""
def parse_list(self, response):
for ss in response.data:
if len(ss) >= 4:
if ss[0:3] == 'pag':
fileName = savePageOriginalInFolder(response, self.outDir, ss)
elif ss[0:3] == 'png':
fileName = savePagePng(response, self.outDir,ss)
A part of the settings.py
DOWNLOADER_MIDDLEWARES = {
'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810, }
SPIDER_MIDDLEWARES = {
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100, }
DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
Result, as you can see there is the spinner in the list area and page numbers are not loaded. (augmenting wait time in lua did not solve the problem)

Internal server error 500 in python

I have checked all cgi related thing, but still I am getting this error.
I tried to run other sample program which get response from .py file. Which works fine, but for this code only it gives :
GET http://localhost/testapp/demos/classifier_demo.py?query= 500 (Internal Server Error)x.ajaxTransport.n.send # jquery-1.10.2.min.js:6x.extend.ajax # jquery-1.10.2.min.js:6call_fun # demo.html:16
jquery-1.10.2.min.js:6 GET http://localhost/testapp/demos/classifier_demo.py?query=EASTERN. 500 (Internal Server Error)x.ajaxTransport.n.send # jquery-1.10.2.min.js:6x.extend.ajax # jquery-1.10.2.min.js:6call_fun # demo.html:16
P.S I have already set the permission 777
I am calling python script using jquery every 5 seconds.
<script>
$( document ).ready(function() {
function call_fun() {
var text = $('textarea#trans').val();
//alert(text)
var data = {"query" : text};
//alert(data);
$.ajax({
url: "classifier_demo.py",
type: "POST",
data: data,
success: function(response) {
console.log(response)
}
})
}
setInterval(call_fun, 5000);
});
</script>
here is the python code:
#!/usr/bin/python
"""
Using the words as features removing stopwords
"""
from sklearn.utils import check_random_state
from sklearn.datasets import load_files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, average_precision_score, f1_score, precision_score, recall_score
from sklearn.externals import joblib
from sklearn.feature_extraction.text import FeatureHasher
from nltk.corpus import stopwords
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import word_tokenize
import nltk
import numpy as np
from time import time
import pprint, pickle
import gearman
import nltk, json, cgi
import re
import os
import sys
"""
This class will train and test the data and will give polarity for various emotions
"""
class SentimentAnalyzer(object):
"""
Init for SentimentAnalyzer
"""
def __init__(self):
self.root_dir = os.getcwd()
self.trainClassifier()
"""
Function to fetch the data from cache
#cache <dict> consist of training data
"""
def fetch_data(self, cache, data_home=None, subset='train', categories=None,
shuffle=True, random_state=42):
if subset in ('train', 'test'):
data = cache[subset]
else:
raise ValueError(
"subset can only be 'train', 'test' or 'all', got '%s'" % subset)
if shuffle:
random_state = check_random_state(random_state)
indices = np.arange(data.target.shape[0])
random_state.shuffle(indices)
data.filenames = data.filenames[indices]
data.target = data.target[indices]
# Use an object array to shuffle: avoids memory copy
data_lst = np.array(data.data, dtype=object)
data_lst = data_lst[indices]
data.data = data_lst.tolist()
return data
"""
For custom tokenizing the text, removed stop words from text
#text <type 'str'> text which needs to get tokenized
#return <type 'str'> tokens
"""
def token_ques(self, text):
things_to_replace = ['?']
#wh_tags = ['WP','WRB','MD','WDT']
things_to_replace += stopwords.words('english')
#wh_word = None
for tok in text.split('\n'):
original_query = tok
query_pos_tags = nltk.pos_tag(word_tokenize(tok))
for word in things_to_replace:
tok = tok.lower()
tok = re.sub("\s"+word+"\s|\s?"+"\?"+"$",' ',tok)
tok = tok.strip(" ")
tok = tok.lstrip(" ")
tok = tok.rstrip(" ")
for word in word_tokenize(tok):
yield word.lower()
"""
Train classifier
"""
def trainClassifier(self):
try:
t1 = time()
start_time = time()
self.hasher = FeatureHasher(input_type='string',non_negative=True)
self.clf = MultinomialNB(alpha=0.001)
self.hasher = FeatureHasher(input_type='string',non_negative=True)
self.clf = MultinomialNB(alpha=0.001)
data_folder = self.root_dir + "/emotions"
train_dataset = load_files(data_folder)
print("Time taken to load the data=>", time()-start_time)
print("data loaded")
cache = dict(train=train_dataset)
self.data_train = self.fetch_data(cache, subset='train')
try:
X_train = pickle.load(open("x_result.pickle", "rb" ) )
y_train = pickle.load(open("y_result.pickle", "rb" ) )
self.clf.fit(X_train, y_train)
except:
print "Updating the classifier"
training_data = []
for text in self.data_train.data:
#text = self.modifyQuery(text.decode('utf-8','ignore'))
text = text.decode('utf-8','ignore')
training_data.append(text)
raw_X = (self.token_ques(text) for text in training_data) #Type of raw_X <type 'generator'>
#X_train = self.vectorizer.fit_transform(raw_X)
X_train = self.hasher.transform(raw_X)
y_train = self.data_train.target
readx = open('x_result.pickle', 'wb')
pickle.dump(X_train, readx)
readx.close()
readY = open('y_result.pickle', 'wb')
pickle.dump(y_train, readY)
readY.close()
self.clf.fit(X_train, y_train)
print("Classifier tained ...")
print("time taken=>", time()-t1)
except Exception:
import traceback
print traceback.format_exc()
"""
Function to test classifier
"""
def testClassifier(self, query):
try:
result = {}
#To replace NE
#query = self.modifyQuery(query)
test_data = [query]
raw_X = (self.token_ques(text) for text in test_data)
X_test = self.hasher.transform(raw_X)
#X_test = self.vectorizer.fit_transform(raw_X)
pred = self.clf.predict(X_test)
print("pred=>", pred)
self.categories = self.data_train.target_names
for doc, category in zip(test_data, pred):
print('%r => %s' % (doc, self.categories[category]))
index = 1
predict_prob = self.clf.predict_proba(X_test)
final_result = []
for doc, category_list in zip(test_data, predict_prob):
# print('\n\n')
category_list = sorted(enumerate(category_list), key=lambda x:x[1], reverse=True)
i = 0
for val in category_list:
if float(val[1]) > float(0.05):
# print('%r => %s => %s' % (doc, self.categories[val[0]], str(val[1])))
result = {}
result[self.categories[val[0]]] = "%0.2f"%(float(val[1]) * 100)+"%"
final_result.append(result)
index += 1
except Exception:
import traceback
print traceback.format_exc()
import json
# print result
# print final_result
return final_result
if __name__ == '__main__':
fs = cgi.FieldStorage()
text = fs['query'].value
#query = fs.getvalue(query)
#query = raw_input("Please enter the text to process:")
query = "Love you man"
result = { "result" : text}
#result = SentimentAnalyzer().testClassifier(query)
json_result = json.dumps( result )
print json_result

Categories

Resources