new STT code

fukurou

the supreme coder
ADMIN
cmd
winget install ffmpeg
check if it installed ok:
ffmpeg -version

in python terminal:
pip install openai-whisper pyaudio numpy wave

you may also need:
pip install openai-whisper pyaudio numpy
pip install pipwin
pipwin install pyaudio

Python:
import whisper
import pyaudio
import numpy as np
import re
from threading import Event

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()


def calibrate_mic(stream, calibrate_seconds=2):
    """Auto-set silence threshold by sampling ambient noise"""
    print(f"Calibrating mic (stay silent for {calibrate_seconds}s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * calibrate_seconds)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    return max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk(stream, silence_threshold):
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(model, audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def main():
    print("Initializing...")
    model = whisper.load_model("base")  # or "tiny"

    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK,
        input_device_index=None  # Auto-select default mic
    )

    # Calibrate microphone
    silence_threshold = calibrate_mic(stream)
    print(f"Silence threshold set to: {silence_threshold:.2f}")

    print("\nSpeak now (Press Ctrl+C to stop):")
    last_text = ""
    try:
        while not exit_event.is_set():
            audio_data = record_chunk(stream, silence_threshold)
            if audio_data:
                text = transcribe_chunk(model, audio_data)
                if text and text != last_text:
                    print(f"> {text}")
                    last_text = text
    except KeyboardInterrupt:
        pass
    finally:
        exit_event.set()
        stream.stop_stream()
        stream.close()
        p.terminate()
        print("\nStopped.")


if __name__ == "__main__":
    main()
 

the living tribunal

Moderator
Staff member
moderator
With atexit cleanup
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Initialize PyAudio globally so it can be accessed for cleanup
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK
)

def calibrate_mic(calibrate_seconds=2):
    """Auto-set silence threshold by sampling ambient noise"""
    print(f"Calibrating mic (stay silent for {calibrate_seconds}s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * calibrate_seconds)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    return max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100

def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""

def record_chunk(silence_threshold):
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None

def transcribe_chunk(model, audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])

def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")

# Register cleanup function to be executed on exit
atexit.register(cleanup)

def main():
    print("Initializing...")
    model = whisper.load_model("base")  # or "tiny"

    silence_threshold = calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")

    print("\nSpeak now (Press Ctrl+C to stop):")
    last_text = ""

    try:
        while not exit_event.is_set():
            audio_data = record_chunk(silence_threshold)
            if audio_data:
                text = transcribe_chunk(model, audio_data)
                if text and text != last_text:
                    print(f"> {text}")
                    last_text = text
    except KeyboardInterrupt:
        pass

if __name__ == "__main__":
    main()
 

the living tribunal

Moderator
Staff member
moderator
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Initialize PyAudio globally
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK
)

def calibrate_mic(calibrate_seconds=2):
    """Auto-set silence threshold by sampling ambient noise"""
    print(f"Calibrating mic (stay silent for {calibrate_seconds}s)...")
    samples = [np.abs(np.frombuffer(stream.read(CHUNK, exception_on_overflow=False), dtype=np.int16)).mean()
               for _ in range(int(RATE / CHUNK * calibrate_seconds))]
    return max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100

def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""

def record_chunk(silence_threshold):
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    for _ in range(10):  # Run for max 10 iterations
        if exit_event.is_set():
            break
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None

def transcribe_chunk(model, audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])

def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")

# Register cleanup function
atexit.register(cleanup)

def main():
    print("Initializing...")
    model = whisper.load_model("base")

    silence_threshold = calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")
    print("\nSpeak now (Press Ctrl+C to stop):")

    last_text = ""

    for _ in range(10):  # Try for a max of 10 iterations
        if exit_event.is_set():
            break
        audio_data = record_chunk(silence_threshold)
        if audio_data:
            text = transcribe_chunk(model, audio_data)
            if text and text != last_text:
                print(f"> {text}")
                last_text = text
                break  # Exit early if text is detected

if __name__ == "__main__":
    main()
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")


def speechReco():
    """Perform speech recognition"""
    print("\nSpeak now (Press Ctrl+C to stop):")
    last_text = ""
    try:
        while not exit_event.is_set():
            audio_data = record_chunk()
            if audio_data:
                text = transcribe_chunk(audio_data)
                if text and text != last_text:
                    print(f"> {text}")
                    last_text = text
    except KeyboardInterrupt:
        pass


if __name__ == "__main__":
    initSTT()
    speechReco()
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")


def speechReco():
    """Perform speech recognition"""
    print("\nSpeak now (Press Ctrl+C to stop):")
    last_text = ""
    try:
        audio_data = record_chunk()
        if audio_data:
           text = transcribe_chunk(audio_data)
           print(f"> {text}")
    except KeyboardInterrupt:
        pass


if __name__ == "__main__":
    initSTT()
    for _ in range(5):
        speechReco()
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain
from async_skills import ShorniSplash

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")

class DiSTT(ShorniSplash):
    def __init__(self, brain: Brain):
        super().__init__()
        initSTT()
        self.brain = brain
        self._botLastOutput = ""


    def trigger(self, ear, skin, eye) -> bool:
        """Triggers the skill (stub implementation)."""
        if len(self.brain.getLogicChobitOutput())>0:
            self._botLastOutput = self.brain.getLogicChobitOutput()
        return True

    @staticmethod
    def _async_func(this_cls):
        """Perform speech recognition"""
        print("\nSpeak now (Press Ctrl+C to stop):")
        last_text = ""
        try:
            audio_data = record_chunk()
            if audio_data:
                text = transcribe_chunk(audio_data)
                this_cls._result = f"{text.lower()}"

                # this_cls._result = f"{text}"
                print(f"> {text}")
        except KeyboardInterrupt:
            pass

    def output_result(self):
        if len(self._botLastOutput) == 0:
            print(f'input: {self._result}')
            self.setSimpleAlg(self._result)  # slower code version
        else:
            self._botLastOutput = ""
            print(f'ignoring: {self._result}')


    def skillNotes(self, param: str) -> str:
        """Provides notes for the skill."""
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain
from async_skills import ShorniSplash

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")

class DiSTT(ShorniSplash):
    skip = False
    def __init__(self, brain: Brain):
        super().__init__()
        initSTT()
        self.brain = brain


    def trigger(self, ear, skin, eye) -> bool:
        """Triggers the skill (stub implementation)."""
        if len(self.brain.getLogicChobitOutput())>0:
            print("skipping")
            DiSTT.skip = True
        return True

    @staticmethod
    def _async_func(this_cls):
        """Perform speech recognition"""
        print("\nSpeak now (Press Ctrl+C to stop):")
        try:
            audio_data = record_chunk()
            if audio_data:
                text = transcribe_chunk(audio_data)
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                if DiSTT.skip:
                    DiSTT.skip = False
                    print(f"ignoring> {text}")
                else:
                    this_cls._result = f"{cleaned_text}"
                    print(f"> {text}")
        except KeyboardInterrupt:
            pass

    def output_result(self):
        if len(self._result) > 0:
            print(f'input: {self._result}')
            self.setSimpleAlg(self._result)


    def skillNotes(self, param: str) -> str:
        """Provides notes for the skill."""
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain
from async_skills import ShorniSplash

# befor runing the code:
'''
cmd
winget install ffmpeg
check if it installed ok:
ffmpeg -version

in python terminal:
pip install openai-whisper pyaudio numpy wave
'''

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(audio_data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")

class DiSTT(ShorniSplash):
    skip = False
    def __init__(self, brain: Brain):
        super().__init__()
        initSTT()
        self.brain = brain


    def trigger(self, ear, skin, eye) -> bool:
        """Triggers the skill (stub implementation)."""
        if len(self.brain.getLogicChobitOutput())>0:
            print("skipping")
            DiSTT.skip = True
        return True

    @staticmethod
    def _async_func(this_cls):
        """Perform speech recognition"""
        print("\nSpeak now (Press Ctrl+C to stop):")
        try:
            audio_data = record_chunk()
            if audio_data:
                text = transcribe_chunk(audio_data)
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                if DiSTT.skip:
                    DiSTT.skip = False
                    print(f"ignoring> {text}")
                else:
                    this_cls._result = f"{cleaned_text}"
                    print(f"> {text}")
        except KeyboardInterrupt:
            pass

    def output_result(self):
        if len(self._result) > 0:
            print(f'input: {self._result}')
            self.setSimpleAlg(self._result)


    def skillNotes(self, param: str) -> str:
        """Provides notes for the skill."""
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 

fukurou

the supreme coder
ADMIN
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain
from async_skills import ShorniSplash

# befor runing the code:
'''
cmd
winget install ffmpeg
check if it installed ok:
ffmpeg -version

in python terminal:
pip install openai-whisper pyaudio numpy wave
'''

# Audio Settings
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MIN_ACTIVE_SECONDS = 0.5  # Minimum speech duration to process
exit_event = Event()

# Global variables
model = whisper.load_model("base")  # or "tiny"
p = pyaudio.PyAudio()
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
    input_device_index=None  # Auto-select default mic
)
silence_threshold = None


def cleanup():
    """Ensure resources are freed properly when the program exits."""
    print("\nCleaning up resources...")
    exit_event.set()
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Cleanup complete. Exiting.")


# Register cleanup function to be executed on exit
atexit.register(cleanup)


def calibrate_mic():
    """Auto-set silence threshold by sampling ambient noise"""
    print("Calibrating mic (stay silent for 2s)...")
    samples = []
    for _ in range(int(RATE / CHUNK * 2)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
    global silence_threshold
    silence_threshold = max(np.mean(samples) * 1.5, 100)  # Ensure minimum threshold of 100


def clean_text(text):
    """Remove repeated phrases and gibberish"""
    text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)  # Remove repeats
    return text.strip() if text.strip() and len(text.split()) >= 1 else ""


def record_chunk():
    """Record until silence is detected"""
    frames = []
    silent_frames = 0
    max_silent_frames = int(RATE / CHUNK * 1.5)  # 1.5s silence = stop

    while not exit_event.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()

        if volume < silence_threshold:
            silent_frames += 1
            if silent_frames > max_silent_frames:
                break
        else:
            silent_frames = 0
            frames.append(data)

    return b''.join(frames) if len(frames) > int(RATE / CHUNK * MIN_ACTIVE_SECONDS) else None


def transcribe_chunk(audio_bytes):
    """Convert audio bytes to text"""
    audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    result = model.transcribe(audio_np, fp16=False, language='en')
    return clean_text(result["text"])


def initSTT():
    """Initialize STT system"""
    print("Initializing...")
    calibrate_mic()
    print(f"Silence threshold set to: {silence_threshold:.2f}")

class DiSTT(ShorniSplash):
    skip = False
    def __init__(self, brain: Brain):
        super().__init__()
        initSTT()
        self.brain = brain


    def trigger(self, ear, skin, eye) -> bool:
        """Triggers the skill (stub implementation)."""
        if len(self.brain.getLogicChobitOutput())>0:
            print("skipping")
            DiSTT.skip = True
        return True

    @staticmethod
    def _async_func(this_cls):
        """Perform speech recognition"""
        print("\nSpeak now (Press Ctrl+C to stop):")
        try:
            audio_data = record_chunk()
            if audio_data:
                text = transcribe_chunk(audio_data)
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                if DiSTT.skip:
                    DiSTT.skip = False
                    print(f"ignoring> {text}")
                else:
                    this_cls._result = f"{cleaned_text}"
                    print(f"> {text}")
        except KeyboardInterrupt:
            pass

    def output_result(self):
        if len(self._result) > 0:
            print(f'input: {self._result}')
            self.setSimpleAlg(self._result)


    def skillNotes(self, param: str) -> str:
        """Provides notes for the skill."""
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 

fukurou

the supreme coder
ADMIN
1 piece version

Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain
from async_skills import ShorniSplash

"""
cmd
winget install ffmpeg
check if it installed ok:
ffmpeg -version

in python terminal:
pip install openai-whisper pyaudio numpy wave
"""

class DiSTT(ShorniSplash):
    # All original global variables moved here as class variables
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    MIN_ACTIVE_SECONDS = 0.5
    exit_event = Event()
    model = whisper.load_model("base")
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK,
        input_device_index=None
    )
    silence_threshold = None
    skip = False
    processing = False

    def __init__(self, brain: Brain):
        super().__init__()
        DiSTT.initSTT()  # Call static method
        self.brain = brain
        atexit.register(DiSTT.cleanup)  # Register static method

    # All original functions converted to static methods with identical logic
    @staticmethod
    def cleanup():
        print("\nCleaning up resources...")
        DiSTT.exit_event.set()
        DiSTT.stream.stop_stream()
        DiSTT.stream.close()
        DiSTT.p.terminate()
        print("Cleanup complete. Exiting.")

    @staticmethod
    def calibrate_mic():
        print("Calibrating mic (stay silent for 2s)...")
        samples = []
        for _ in range(int(DiSTT.RATE / DiSTT.CHUNK * 2)):
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
        DiSTT.silence_threshold = max(np.mean(samples) * 1.5, 100)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)
        return text.strip() if text.strip() and len(text.split()) >= 1 else ""

    @staticmethod
    def record_chunk():
        frames = []
        silent_frames = 0
        max_silent_frames = int(DiSTT.RATE / DiSTT.CHUNK * 1.5)

        while not DiSTT.exit_event.is_set():
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            audio_data = np.frombuffer(data, dtype=np.int16)
            volume = np.abs(audio_data).mean()

            if volume < DiSTT.silence_threshold:
                silent_frames += 1
                if silent_frames > max_silent_frames:
                    break
            else:
                silent_frames = 0
                frames.append(data)

        return b''.join(frames) if len(frames) > int(DiSTT.RATE / DiSTT.CHUNK * DiSTT.MIN_ACTIVE_SECONDS) else None

    @staticmethod
    def transcribe_chunk(audio_bytes):
        audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
        result = DiSTT.model.transcribe(audio_np, fp16=False, language='en')
        return DiSTT.clean_text(result["text"])

    @staticmethod
    def initSTT():
        print("Initializing...")
        DiSTT.calibrate_mic()
        print(f"Silence threshold set to: {DiSTT.silence_threshold:.2f}")

    # Original instance methods remain exactly the same
    def trigger(self, ear, skin, eye) -> bool:
        if DiSTT.processing:
            print("waiting to finish summoned async")
            return False
        if len(self.brain.getLogicChobitOutput()) > 0:
            print("skipping")
            DiSTT.skip = True
        return True

    @staticmethod
    def _async_func(this_cls):
        """This remains EXACTLY as in the original code"""
        print("\nSpeak now (Press Ctrl+C to stop):")
        try:
            audio_data = DiSTT.record_chunk()  # Calls static method
            if audio_data:
                text = DiSTT.transcribe_chunk(audio_data)  # Calls static method
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                if DiSTT.skip:
                    DiSTT.skip = False
                    print(f"ignoring> {text}")
                else:
                    this_cls._result = f"{cleaned_text}"
                    print(f"> {text}")
        except KeyboardInterrupt:
            pass
        finally:
            print("finished processing")
            DiSTT.processing = False

    def output_result(self):
        if len(self._result) > 0:
            print(f'input: {self._result}')
            self.setSimpleAlg(self._result)

    def skillNotes(self, param: str) -> str:
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 

fukurou

the supreme coder
ADMIN
the sync version seems more reliable:
Python:
class DiSTTSync(Skill):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    MIN_ACTIVE_SECONDS = 0.5
    exit_event = Event()
    model = whisper.load_model("base")
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK,
        input_device_index=None
    )
    silence_threshold = None
    skip = False
    processing = False

    def __init__(self, brain: Brain):
        super().__init__()
        DiSTT.initSTT()  # Call static method
        self.brain = brain
        atexit.register(DiSTT.cleanup)  # Register static method

    # All original functions converted to static methods with identical logic
    @staticmethod
    def cleanup():
        print("\nCleaning up resources...")
        DiSTT.exit_event.set()
        DiSTT.stream.stop_stream()
        DiSTT.stream.close()
        DiSTT.p.terminate()
        print("Cleanup complete. Exiting.")

    @staticmethod
    def calibrate_mic():
        print("Calibrating mic (stay silent for 2s)...")
        samples = []
        for _ in range(int(DiSTT.RATE / DiSTT.CHUNK * 2)):
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
        DiSTT.silence_threshold = max(np.mean(samples) * 1.5, 100)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)
        return text.strip() if text.strip() and len(text.split()) >= 1 else ""

    @staticmethod
    def record_chunk():
        frames = []
        silent_frames = 0
        max_silent_frames = int(DiSTT.RATE / DiSTT.CHUNK * 1.5)

        while not DiSTT.exit_event.is_set():
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            audio_data = np.frombuffer(data, dtype=np.int16)
            volume = np.abs(audio_data).mean()

            if volume < DiSTT.silence_threshold:
                silent_frames += 1
                if silent_frames > max_silent_frames:
                    break
            else:
                silent_frames = 0
                frames.append(data)

        return b''.join(frames) if len(frames) > int(DiSTT.RATE / DiSTT.CHUNK * DiSTT.MIN_ACTIVE_SECONDS) else None

    @staticmethod
    def transcribe_chunk(audio_bytes):
        audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
        result = DiSTT.model.transcribe(audio_np, fp16=False, language='en')
        return DiSTT.clean_text(result["text"])

    @staticmethod
    def initSTT():
        print("Initializing...")
        DiSTT.calibrate_mic()
        print(f"Silence threshold set to: {DiSTT.silence_threshold:.2f}")

    def input(self, ear: str, skin: str, eye: str):
        print("\nSpeak now")
        try:
            audio_data = DiSTT.record_chunk()  # Calls static method
            if audio_data:
                text = DiSTT.transcribe_chunk(audio_data)  # Calls static method
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                if DiSTT.skip:
                    DiSTT.skip = False
                    print(f"ignoring> {text}")
                else:
                    print(f"{cleaned_text}")
                    print(f"> {text}")

        except KeyboardInterrupt:
            pass
        finally:
            print("finished processing")
            DiSTT.processing = False


    def skillNotes(self, param: str) -> str:
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"

needs more work tho
 

fukurou

the supreme coder
ADMIN
Python:
class DiSTTSync(Skill):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    MIN_ACTIVE_SECONDS = 0.5
    exit_event = Event()
    model = whisper.load_model("base")
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK,
        input_device_index=None
    )
    silence_threshold = None

    def __init__(self, brain: Brain):
        super().__init__()
        DiSTT.initSTT()  # Call static method
        self.brain = brain
        atexit.register(DiSTT.cleanup)  # Register static method

    # All original functions converted to static methods with identical logic
    @staticmethod
    def cleanup():
        print("\nCleaning up resources...")
        DiSTT.exit_event.set()
        DiSTT.stream.stop_stream()
        DiSTT.stream.close()
        DiSTT.p.terminate()
        print("Cleanup complete. Exiting.")

    @staticmethod
    def calibrate_mic():
        print("Calibrating mic (stay silent for 2s)...")
        samples = []
        for _ in range(int(DiSTT.RATE / DiSTT.CHUNK * 2)):
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
        DiSTT.silence_threshold = max(np.mean(samples) * 1.5, 100)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)
        return text.strip() if text.strip() and len(text.split()) >= 1 else ""

    @staticmethod
    def record_chunk():
        frames = []
        silent_frames = 0
        max_silent_frames = int(DiSTT.RATE / DiSTT.CHUNK * 1.5)

        while not DiSTT.exit_event.is_set():
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            audio_data = np.frombuffer(data, dtype=np.int16)
            volume = np.abs(audio_data).mean()

            if volume < DiSTT.silence_threshold:
                silent_frames += 1
                if silent_frames > max_silent_frames:
                    break
            else:
                silent_frames = 0
                frames.append(data)

        return b''.join(frames) if len(frames) > int(DiSTT.RATE / DiSTT.CHUNK * DiSTT.MIN_ACTIVE_SECONDS) else None

    @staticmethod
    def transcribe_chunk(audio_bytes):
        audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
        result = DiSTT.model.transcribe(audio_np, fp16=False, language='en')
        return DiSTT.clean_text(result["text"])

    @staticmethod
    def initSTT():
        print("Initializing...")
        DiSTT.calibrate_mic()
        print(f"Silence threshold set to: {DiSTT.silence_threshold:.2f}")

    def input(self, ear: str, skin: str, eye: str):
        if len(self.brain.getLogicChobitOutput()) > 0:
            print("skipping listen")
            return
        print("\nSpeak now")
        try:
            audio_data = DiSTT.record_chunk()  # Calls static method
            if audio_data:
                text = DiSTT.transcribe_chunk(audio_data)  # Calls static method
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                self.setSimpleAlg(f"{cleaned_text}")
                print(f"> {text}")

        except KeyboardInterrupt:
            pass
        finally:
            print("finished processing")
            DiSTT.processing = False


    def skillNotes(self, param: str) -> str:
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"

works ok but could use a skill to shut off the program
 

fukurou

the supreme coder
ADMIN
premode:
Python:
import whisper
import pyaudio
import numpy as np
import re
import atexit
from threading import Event

from LivinGrimoire23 import Brain, Skill

"""
cmd
winget install ffmpeg
check if it installed ok:
ffmpeg -version

in python terminal:
pip install openai-whisper pyaudio numpy wave
"""

class DiSTT(Skill):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    MIN_ACTIVE_SECONDS = 0.5
    exit_event = Event()
    model = whisper.load_model("base")
    p = pyaudio.PyAudio()
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=CHUNK,
        input_device_index=None
    )
    silence_threshold = None

    def __init__(self, brain: Brain):
        super().__init__()
        DiSTT.initSTT()  # Call static method
        self.brain = brain
        atexit.register(DiSTT.cleanup)  # Register static method

    # All original functions converted to static methods with identical logic
    @staticmethod
    def cleanup():
        print("\nCleaning up resources...")
        DiSTT.exit_event.set()
        DiSTT.stream.stop_stream()
        DiSTT.stream.close()
        DiSTT.p.terminate()
        print("Cleanup complete. Exiting.")

    @staticmethod
    def calibrate_mic():
        print("Calibrating mic (stay silent for 2s)...")
        samples = []
        for _ in range(int(DiSTT.RATE / DiSTT.CHUNK * 2)):
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            samples.append(np.abs(np.frombuffer(data, dtype=np.int16)).mean())
        DiSTT.silence_threshold = max(np.mean(samples) * 1.5, 100)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'(\b\w+\b)(?:\s+\1\b)+', r'\1', text)
        return text.strip() if text.strip() and len(text.split()) >= 1 else ""

    @staticmethod
    def record_chunk():
        frames = []
        silent_frames = 0
        max_silent_frames = int(DiSTT.RATE / DiSTT.CHUNK * 1.5)

        while not DiSTT.exit_event.is_set():
            data = DiSTT.stream.read(DiSTT.CHUNK, exception_on_overflow=False)
            audio_data = np.frombuffer(data, dtype=np.int16)
            volume = np.abs(audio_data).mean()

            if volume < DiSTT.silence_threshold:
                silent_frames += 1
                if silent_frames > max_silent_frames:
                    break
            else:
                silent_frames = 0
                frames.append(data)

        return b''.join(frames) if len(frames) > int(DiSTT.RATE / DiSTT.CHUNK * DiSTT.MIN_ACTIVE_SECONDS) else None

    @staticmethod
    def transcribe_chunk(audio_bytes):
        audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
        result = DiSTT.model.transcribe(audio_np, fp16=False, language='en')
        return DiSTT.clean_text(result["text"])

    @staticmethod
    def initSTT():
        print("Initializing...")
        DiSTT.calibrate_mic()
        print(f"Silence threshold set to: {DiSTT.silence_threshold:.2f}")

    def input(self, ear: str, skin: str, eye: str):
        if len(self.brain.getLogicChobitOutput()) > 0:
            print("skipping listen")
            return
        print("\nSpeak now")
        try:
            audio_data = DiSTT.record_chunk()  # Calls static method
            if audio_data:
                text = DiSTT.transcribe_chunk(audio_data)  # Calls static method
                cleaned_text = re.sub(r'[^\w\s]', '', text.lower())
                self.setSimpleAlg(f"{cleaned_text}")
                print(f"> {text}")

        except KeyboardInterrupt:
            pass
        finally:
            print("finished processing")
            DiSTT.processing = False


    def skillNotes(self, param: str) -> str:
        if param == "notes":
            return "speech to text"
        elif param == "triggers":
            return "automatic and continuous"
        return "note unavailable"
 
Top