Spaces:

iakarshu
/

latr-vqa

Runtime error

iakarshu commited on Jul 14, 2022

Commit

e1c1946

1 Parent(s): 97e6b00

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -73,6 +73,9 @@ answer = gr.outputs.Textbox(label="Predicted answer")
 examples = [["remote.jpg", "what number is the button near the top left?"]]
 def answer_question(image, question):
     image.save('sample_img.jpg')
@@ -111,6 +114,7 @@ def answer_question(image, question):
     tokenized_words = tokenized_words.unsqueeze(0)
     question = question.unsqueeze(0)
     encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
     with torch.no_grad():

 examples = [["remote.jpg", "what number is the button near the top left?"]]
+from transformers import ViTFeatureExtractor, ViTModel
+vit_feat_extract = ViTFeatureExtractor("google/vit-base-patch16-224-in21k")
 def answer_question(image, question):
     image.save('sample_img.jpg')
     tokenized_words = tokenized_words.unsqueeze(0)
     question = question.unsqueeze(0)
+    img = vit_feat_extract(img, return_tensors = 'pt')['pixel_values']
     encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
     with torch.no_grad():