llama and transformerjs backend working

7 months ago · 1e28611c0a
parent 2b3ef31d8d
commit 1e28611c0a
5 changed files with 651 additions and 511 deletions
--- a/apps/remixdesktop/src/lib/completionTransformer.ts
+++ b/apps/remixdesktop/src/lib/completionTransformer.ts
@ -29,7 +29,11 @@ class InlineCompletionTransformer {
      const TransformersApi = Function('return import("@xenova/transformers")')();
      const { pipeline, env} = await TransformersApi;

-      InlineCompletionTransformer.model =  InlineCompletionTransformer.defaultModels.find(model => model.name === 'DeepSeekTransformer')
+      if (InlineCompletionTransformer.model.modelReqs.backend !== 'transformerjs') {
+        console.log('model not supported')
+        return
+      }
+
      console.log('loading model', InlineCompletionTransformer.model)
      InlineCompletionTransformer.instance = pipeline(InlineCompletionTransformer.task, InlineCompletionTransformer.model.modelName, { progress_callback, quantized: true});
    }
@ -76,6 +80,7 @@ class DownloadManager {
      break;

    case 'complete':
+      this.events.emit(e.status, e)
      if (this.responses[e.id]) {
        if (this.current === e.id) {
          this.responses[e.id](null, e)
@ -97,13 +102,10 @@ export class InlineCompletionServiceTransformer{
  dMng = new DownloadManager()
  isReady = false
  event = new EventEmitter()
+  selectedModel: any

-  constructor(defaultModels) { 
-
-    InlineCompletionTransformer.defaultModels = defaultModels
-    InlineCompletionTransformer.model = defaultModels.find(model => model.name === 'DeepSeekTransformer')
-    InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
-    InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
+  constructor(model: any) { 
+    this.selectedModel = model

    this.dMng.events.on('progress', (data) => {
      // log progress percentage 
@ -116,28 +118,61 @@ export class InlineCompletionServiceTransformer{
      } 
    })
    this.dMng.events.on('done', (data) => {
-      console.log('download complete')
-      this.isReady = true
    })
    this.dMng.events.on('ready', (data) => {
      console.log('model ready')
      this.isReady = true
    })
+    this.dMng.events.on('complete', (data) => {
+      
+    })

  }

+  async init(envPath?: string) {
+    InlineCompletionTransformer.model = this.selectedModel
+    InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
+
+    // create inference instance
+    await InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
+
+    if (envPath) {
+      this.setTransformerEnvPath(envPath)
+    }
+  }
+
+  setTransformerEnvPath(path: string) {
+    if (InlineCompletionTransformer.instance === null) {
+      console.log('model not ready yet')
+      return
+    }
+    if (path === '') {
+      console.log('path is empty')
+      return
+    }
+
+    console.log('check this setting env path')
+    InlineCompletionTransformer.instance.env.set('TRANSFORMERS_CACHE', path)
+  }
+
  async code_completion(context: any, params=completionParams): Promise<any> {
+    if (!this.isReady) {
+      console.log('model not ready yet')
+      return
+    }
+
    // as of now no prompt required
    this.event.emit('onInference')
    const instance = await InlineCompletionTransformer.getInstance()
-    const result =  await instance(context, completionParams)
+    const result =  await instance(context, params)
    this.event.emit('onInferenceDone')
    return result
  }

  async code_insertion(msg_pfx: string, msg_sfx: string, params=insertionParams): Promise<any> {
+    console.log('in code_insertion', this)
    if (!this.isReady) {
-      console.log('model not ready')
+      console.log('model not ready yet')
      return
    }
    
--- a/apps/remixdesktop/src/lib/llamaInferencer.ts
+++ b/apps/remixdesktop/src/lib/llamaInferencer.ts
@ -0,0 +1,166 @@
+import path from 'path';
+import fs from 'fs';
+import axios from "axios";
+import { EventEmitter } from 'events';
+import { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } from "node-llama-cpp";
+
+class LLamaBackend {
+	static instance: any
+	static model: any
+	static modelPath: string
+
+	static async getInstance() {
+		if (this.instance === null || this.instance === undefined) {
+			const LlamaApi = Function('return import("node-llama-cpp")')();
+			const { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } = await LlamaApi;
+
+			const getModelOptions = () =>  {
+
+				const options = {
+					modelPath: this.modelPath? this.modelPath: null,
+					threads: 1,
+					temperature: 0.6,
+					topK: 40,
+					topP: 0.92,
+					logitsAll: false,
+					vocabOnly: false,
+					useMmap: false,
+					useMlock: false,
+					embedding: false,
+				};
+				return options;
+			}
+			console.log('loading model with options', getModelOptions())
+			const m = new LlamaModel(getModelOptions());
+
+			console.log("system infos\n", LlamaModel.systemInfo)
+			console.log("model infos\n", m.modelInfo)
+			const context = new LlamaContext({model: m});
+			const session = new LlamaChatSession({context});
+			this.instance = session
+			return this.instance
+		}
+		return this.instance
+	}
+
+}
+
+export class LLamaInferencer {
+    plugin: any
+		isReady: boolean = false
+		selectedModel: any
+		modelPath: string
+		event: EventEmitter
+		inferencer: any
+
+    constructor(props, model) {
+        this.plugin = props
+				this.selectedModel = model
+				this.event = new EventEmitter()
+    }
+
+		async init(envPath?: string) {
+			try {
+				await this._downloadModel(this.selectedModel)
+
+				if (this.modelPath === undefined) {
+					console.log('Model not downloaded or not found')
+					return
+				}
+
+				console.log('Model downloaded at', this.modelPath)
+
+				LLamaBackend.model = this.selectedModel
+				LLamaBackend.modelPath = this.modelPath
+				this.inferencer = await LLamaBackend.getInstance()
+				this.inferencer.init()
+				this.isReady = this.inferencer.initialized
+			} catch (error) {
+				console.log('Error initializing the model', error)
+			}
+		}
+
+    async _downloadModel(model): Promise<void> {
+        console.log('Downloading the model model', model)
+        console.log('Downloading model', model.downloadUrl) 
+    
+        const wdir = await this.plugin.call('fs' as any, 'getWorkingDir');
+        console.log('working dir is', wdir)
+        const outputLocationDir = await this.plugin.call('fs' as any, 'selectFolder', wdir);
+        console.log('output location dir is', outputLocationDir)
+    
+        if (outputLocationDir === undefined) {
+          console.log('No output location selected');
+          return;
+        }
+    
+        const outputLocationPath = path.join(outputLocationDir, model.modelName);
+        console.log('output location path is', outputLocationDir)
+        if (fs.existsSync(outputLocationPath)) { 
+					this.modelPath = outputLocationPath
+          console.log('Model already exists in the output location', outputLocationPath);
+          return;
+        }
+    
+        // Make a HEAD request to get the file size
+        const { headers } = await axios.head(model.downloadUrl);
+        const totalSize = parseInt(headers['content-length'], 10);
+    
+        // Create a write stream to save the file
+        const writer = fs.createWriteStream(outputLocationPath);
+    
+        // Start the file download
+        const response = await axios({
+          method: 'get',
+          url: model.downloadUrl,
+          responseType: 'stream'
+        });
+    
+        let downloadedSize = 0;
+    
+        response.data.on('data', (chunk: Buffer) => {
+          downloadedSize += chunk.length;
+          const progress = (Number((downloadedSize / totalSize) * 100).toFixed(2));
+          console.log(`Downloaded ${progress}%`);
+          this.event.emit('download', progress);
+        });
+				
+        response.data.pipe(writer);
+
+				this.event.emit('ready')
+				this.modelPath = outputLocationPath
+        console.log('LLama Download complete');
+
+        return new Promise((resolve, reject) => {
+          writer.on('finish', resolve);
+          writer.on('error', reject);
+        });
+    }
+
+		async code_completion(context: any, params): Promise<any> {
+			if (!this.isReady) {
+				console.log('model not ready yet')
+				return
+			}
+	
+			// as of now no prompt required
+			this.event.emit('onInference')
+			const result =  this.inferencer.promptWithMeta(context)
+			this.event.emit('onInferenceDone')
+			return result
+		}
+	
+		async code_insertion(msg_pfx: string, msg_sfx: string, params): Promise<any> {
+			if (!this.isReady) {
+				console.log('model not ready yet')
+				return
+			}
+			
+			this.event.emit('onInference')
+			// const prompt = getInsertionPrompt(InlineCompletionTransformer.model, msg_pfx, msg_sfx)
+			// const instance = await InlineCompletionTransformer.getInstance()
+			// const result = instance(prompt, insertionParams)
+			// this.event.emit('onInferenceDone')
+			// return result
+		}
+}
--- a/apps/remixdesktop/src/plugins/remixAIDektop.ts
+++ b/apps/remixdesktop/src/plugins/remixAIDektop.ts
@ -8,6 +8,7 @@ import path from 'path';
 import {ipcMain} from 'electron';

 import {InlineCompletionServiceTransformer} from '../lib/completionTransformer'
+import { LLamaInferencer } from '../lib/llamaInferencer';

 //import {LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions} from "node-llama-cpp";

@ -43,13 +44,13 @@ const clientProfile: Profile = {
  description: 'RemixAI provides AI services to Remix IDE Desktop.',
  kind: '',
  documentation: 'https://remix-ide.readthedocs.io/en/latest/remixai.html',
-  methods: ['downloadModel', 'getInferenceModel', 'loadTransformerModel', 'code_completion'],
+  methods: ['initializeModelBackend', 'code_completion'],
 }

 class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
-  SelectedModelPath: any
-  selectedModel: any
-  inlineCompleter: any
+  
+  multitaskModel: LLamaInferencer| InlineCompletionServiceTransformer = null
+  completionModel: LLamaInferencer| InlineCompletionServiceTransformer = null

  constructor (webContentsId: number, profile: Profile){
    console.log("loading the remix plugin client ........................")
@ -63,85 +64,60 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
      console.log("loaded the remix plugin client application side")
    })
  }
-  async listAvailableModels(){
-
-  }

  async enable (){
    console.log('Remix AI desktop plugin enabled')
    this.emit('enabled')
  }
-  
-  async downloadModel(model): Promise<void> {
-    console.log('Downloading the model model', model)
-    console.log('Downloading model', model.downloadUrl) 
-
-    const wdir = await this.call('fs' as any, 'getWorkingDir');
-    console.log('working dir is', wdir)
-    const outputLocationDir = await this.call('fs' as any, 'selectFolder', wdir);
-    console.log('output location dir is', outputLocationDir)
-
-    if (outputLocationDir === undefined) {
-      console.log('No output location selected');
-      return;
-    }

-    const outputLocationPath = path.join(outputLocationDir, model.modelName);
-    console.log('output location path is', outputLocationDir)
-    if (fs.existsSync(outputLocationPath)) { 
-      console.log('Model already exists in the output location', outputLocationPath);
-      this.SelectedModelPath = outputLocationPath;
-      this.selectedModel = model;
-      return;
+  async initializeModelBackend(multitaskModel: any, completionModel?: any){
+    console.log("Initializing backend with model ", multitaskModel, completionModel)
+    switch (multitaskModel.modelReqs.backend) {
+      case 'llamacpp':
+        this.multitaskModel = new LLamaInferencer(this, multitaskModel)
+        break;
+      case 'transformerjs':
+        this.multitaskModel = new InlineCompletionServiceTransformer(multitaskModel)
+        break;
+      default:
+        console.log("Backend not supported")
+        break;
    }

-    // Make a HEAD request to get the file size
-    const { headers } = await axios.head(model.downloadUrl);
-    const totalSize = parseInt(headers['content-length'], 10);
-
-    // Create a write stream to save the file
-    const writer = fs.createWriteStream(outputLocationPath);
-
-    // Start the file download
-    const response = await axios({
-      method: 'get',
-      url: model.downloadUrl,
-      responseType: 'stream'
-    });
-
-    let downloadedSize = 0;
-
-    response.data.on('data', (chunk: Buffer) => {
-      downloadedSize += chunk.length;
-      const progress = (downloadedSize / totalSize) * 100;
-      console.log(`Downloaded ${progress}%`);
-      this.emit('download_progress', progress);
-    });
-
-    response.data.pipe(writer);
-    this.SelectedModelPath = outputLocationPath;
-    this.selectedModel = model;
-
-    console.log('Download complete');
-    return new Promise((resolve, reject) => {
-      writer.on('finish', resolve);
-      writer.on('error', reject);
-    });
+    if (completionModel && completionModel.modelType === 'CODE_COMPLETION'){
+      switch (completionModel.modelReqs.backend) {
+        case 'llamacpp':
+          this.completionModel = new LLamaInferencer(this, completionModel)
+          break;
+        case 'transformerjs':
+          this.completionModel = new InlineCompletionServiceTransformer(completionModel)
+          break;
+        default:
+          console.log("Backend not supported")
+          break;
+      }
+    }

-  }
+    // init the mmodels 
+    if (this.multitaskModel){
+      await this.multitaskModel.init()
+    } 

-  async loadTransformerModel(defaultModels) {
-    this.inlineCompleter = await new InlineCompletionServiceTransformer(defaultModels);
-    if (this.inlineCompleter.ready) {
-      console.log("Completer  ready");
+    if (this.completionModel){
+      await this.completionModel.init()
    }
-    console.log("Loaded transformer")
+
  }
+  

  code_completion(context: any) {
    console.log("Code completion called")
-    console.log("Context is ", this.inlineCompleter)
-    return this.inlineCompleter.code_completion(context);
+    if (this.completionModel){
+      return this.completionModel.code_completion(context, {max_new_tokens: 100})
+    }
+
+    // use general purpose model 
+    return this.multitaskModel.code_completion(context, {max_new_tokens: 100})
  }

  // async _loadLocalModel(): Promise<LlamaChatSession> {
@ -182,5 +158,10 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
  //   return this._loadLocalModel();
  // } 

+  changemodel(newModel: any){
+    /// dereference the current static inference object
+    /// set new one
+  }
+
 }

--- a/apps/remixdesktop/yarn.lock
+++ b/apps/remixdesktop/yarn.lock
--- a/libs/remix-ui/remix-ai/src/lib/components/Default.tsx
+++ b/libs/remix-ui/remix-ai/src/lib/components/Default.tsx
@ -57,7 +57,7 @@ export const Default = (props) => {
              // if (!completer.ready) {
              //   await completer.init();
              // }
-              await props.plugin.call(pluginName, 'loadTransformerModel', DefaultModels());
+              await props.plugin.call(pluginName, 'initializeModelBackend', DefaultModels()[0]);

              // // const code = completer.code_completion("pragma solidity ^0.8.0;\n")
              console.log("Got transformer model completion ");