saadfarhad commited on
Commit
95d4486
·
verified ·
1 Parent(s): b21f7ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -1,35 +1,43 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoConfig, AutoProcessor, AutoModelForCausalLM
 
 
4
 
5
- # === Diagnostic Code Start ===
6
- # Load the configuration with remote code enabled
7
- config = AutoConfig.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2", trust_remote_code=True)
 
 
8
  print("Configuration type:", type(config))
9
  print("Configuration architectures:", config.architectures)
10
- # === Diagnostic Code End ===
11
 
12
- # Load processor and model with remote code enabled.
13
  processor = AutoProcessor.from_pretrained(
14
  "lmms-lab/LLaVA-Video-7B-Qwen2",
15
  trust_remote_code=True
16
  )
17
- model = AutoModelForCausalLM.from_pretrained(
18
  "lmms-lab/LLaVA-Video-7B-Qwen2",
19
  trust_remote_code=True
20
  )
21
 
 
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
  model.to(device)
24
 
25
  def analyze_video(video_path):
26
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
 
27
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
28
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
29
  outputs = model.generate(**inputs, max_new_tokens=100)
30
  answer = processor.decode(outputs[0], skip_special_tokens=True)
31
  return answer
32
 
 
33
  iface = gr.Interface(
34
  fn=analyze_video,
35
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoConfig, AutoProcessor
4
+ # Import the custom model class directly.
5
+ from transformers.models.llava.modeling_llava import LlavaQwenForCausalLM
6
 
7
+ # --- Diagnostic Print (Optional) ---
8
+ config = AutoConfig.from_pretrained(
9
+ "lmms-lab/LLaVA-Video-7B-Qwen2",
10
+ trust_remote_code=True
11
+ )
12
  print("Configuration type:", type(config))
13
  print("Configuration architectures:", config.architectures)
14
+ # --- End Diagnostic ---
15
 
16
+ # Load the processor and the model using the custom model class.
17
  processor = AutoProcessor.from_pretrained(
18
  "lmms-lab/LLaVA-Video-7B-Qwen2",
19
  trust_remote_code=True
20
  )
21
+ model = LlavaQwenForCausalLM.from_pretrained(
22
  "lmms-lab/LLaVA-Video-7B-Qwen2",
23
  trust_remote_code=True
24
  )
25
 
26
+ # Move model to the appropriate device.
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
  model.to(device)
29
 
30
  def analyze_video(video_path):
31
  prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
32
+ # Process the text and video.
33
  inputs = processor(text=prompt, video=video_path, return_tensors="pt")
34
  inputs = {k: v.to(device) for k, v in inputs.items()}
35
+ # Generate output (assumes the custom model has a generate method).
36
  outputs = model.generate(**inputs, max_new_tokens=100)
37
  answer = processor.decode(outputs[0], skip_special_tokens=True)
38
  return answer
39
 
40
+ # Create the Gradio Interface.
41
  iface = gr.Interface(
42
  fn=analyze_video,
43
  inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),