Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -73,6 +73,9 @@ answer = gr.outputs.Textbox(label="Predicted answer")
|
|
| 73 |
examples = [["remote.jpg", "what number is the button near the top left?"]]
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
def answer_question(image, question):
|
| 77 |
image.save('sample_img.jpg')
|
| 78 |
|
|
@@ -111,6 +114,7 @@ def answer_question(image, question):
|
|
| 111 |
tokenized_words = tokenized_words.unsqueeze(0)
|
| 112 |
question = question.unsqueeze(0)
|
| 113 |
|
|
|
|
| 114 |
encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
|
| 115 |
|
| 116 |
with torch.no_grad():
|
|
|
|
| 73 |
examples = [["remote.jpg", "what number is the button near the top left?"]]
|
| 74 |
|
| 75 |
|
| 76 |
+
from transformers import ViTFeatureExtractor, ViTModel
|
| 77 |
+
vit_feat_extract = ViTFeatureExtractor("google/vit-base-patch16-224-in21k")
|
| 78 |
+
|
| 79 |
def answer_question(image, question):
|
| 80 |
image.save('sample_img.jpg')
|
| 81 |
|
|
|
|
| 114 |
tokenized_words = tokenized_words.unsqueeze(0)
|
| 115 |
question = question.unsqueeze(0)
|
| 116 |
|
| 117 |
+
img = vit_feat_extract(img, return_tensors = 'pt')['pixel_values']
|
| 118 |
encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
|
| 119 |
|
| 120 |
with torch.no_grad():
|