llama and transformerjs backend working

5 months ago · 1e28611c0a
parent 2b3ef31d8d
commit 1e28611c0a
5 changed files with 651 additions and 511 deletions
--- a/apps/remixdesktop/src/lib/completionTransformer.ts
+++ b/apps/remixdesktop/src/lib/completionTransformer.ts
@ -29,7 +29,11 @@ class InlineCompletionTransformer {
      const TransformersApi = Function('return import("@xenova/transformers")')();
      const { pipeline, env} = await TransformersApi;
-      InlineCompletionTransformer.model =  InlineCompletionTransformer.defaultModels.find(model => model.name === 'DeepSeekTransformer')
+      if (InlineCompletionTransformer.model.modelReqs.backend !== 'transformerjs') {
        console.log('model not supported')
        return
      }
      console.log('loading model', InlineCompletionTransformer.model)
      InlineCompletionTransformer.instance = pipeline(InlineCompletionTransformer.task, InlineCompletionTransformer.model.modelName, { progress_callback, quantized: true});
    }
@ -76,6 +80,7 @@ class DownloadManager {
      break;
    case 'complete':
      this.events.emit(e.status, e)
      if (this.responses[e.id]) {
        if (this.current === e.id) {
          this.responses[e.id](null, e)
@ -97,13 +102,10 @@ export class InlineCompletionServiceTransformer{
  dMng = new DownloadManager()
  isReady = false
  event = new EventEmitter()
  selectedModel: any
-  constructor(defaultModels) { 
+  constructor(model: any) { 
-
+    this.selectedModel = model
    InlineCompletionTransformer.defaultModels = defaultModels
    InlineCompletionTransformer.model = defaultModels.find(model => model.name === 'DeepSeekTransformer')
    InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
    InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
    this.dMng.events.on('progress', (data) => {
      // log progress percentage 
@ -116,28 +118,61 @@ export class InlineCompletionServiceTransformer{
      } 
    })
    this.dMng.events.on('done', (data) => {
      console.log('download complete')
      this.isReady = true
    })
    this.dMng.events.on('ready', (data) => {
      console.log('model ready')
      this.isReady = true
    })
    this.dMng.events.on('complete', (data) => {
    })
  }
  async init(envPath?: string) {
    InlineCompletionTransformer.model = this.selectedModel
    InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
    // create inference instance
    await InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
    if (envPath) {
      this.setTransformerEnvPath(envPath)
    }
  }
  setTransformerEnvPath(path: string) {
    if (InlineCompletionTransformer.instance === null) {
      console.log('model not ready yet')
      return
    }
    if (path === '') {
      console.log('path is empty')
      return
    }
    console.log('check this setting env path')
    InlineCompletionTransformer.instance.env.set('TRANSFORMERS_CACHE', path)
  }
  async code_completion(context: any, params=completionParams): Promise<any> {
    if (!this.isReady) {
      console.log('model not ready yet')
      return
    }
    // as of now no prompt required
    this.event.emit('onInference')
    const instance = await InlineCompletionTransformer.getInstance()
-    const result =  await instance(context, completionParams)
+    const result =  await instance(context, params)
    this.event.emit('onInferenceDone')
    return result
  }
  async code_insertion(msg_pfx: string, msg_sfx: string, params=insertionParams): Promise<any> {
    console.log('in code_insertion', this)
    if (!this.isReady) {
-      console.log('model not ready')
+      console.log('model not ready yet')
      return
    }
--- a/apps/remixdesktop/src/lib/llamaInferencer.ts
+++ b/apps/remixdesktop/src/lib/llamaInferencer.ts
@ -0,0 +1,166 @@
 import path from 'path';
 import fs from 'fs';
 import axios from "axios";
 import { EventEmitter } from 'events';
 import { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } from "node-llama-cpp";
 class LLamaBackend {
 	static instance: any
 	static model: any
 	static modelPath: string
 	static async getInstance() {
 		if (this.instance === null || this.instance === undefined) {
 			const LlamaApi = Function('return import("node-llama-cpp")')();
 			const { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } = await LlamaApi;
 			const getModelOptions = () =>  {
 				const options = {
 					modelPath: this.modelPath? this.modelPath: null,
 					threads: 1,
 					temperature: 0.6,
 					topK: 40,
 					topP: 0.92,
 					logitsAll: false,
 					vocabOnly: false,
 					useMmap: false,
 					useMlock: false,
 					embedding: false,
 				};
 				return options;
 			}
 			console.log('loading model with options', getModelOptions())
 			const m = new LlamaModel(getModelOptions());
 			console.log("system infos\n", LlamaModel.systemInfo)
 			console.log("model infos\n", m.modelInfo)
 			const context = new LlamaContext({model: m});
 			const session = new LlamaChatSession({context});
 			this.instance = session
 			return this.instance
 		}
 		return this.instance
 	}
 }
 export class LLamaInferencer {
    plugin: any
 		isReady: boolean = false
 		selectedModel: any
 		modelPath: string
 		event: EventEmitter
 		inferencer: any
    constructor(props, model) {
        this.plugin = props
 				this.selectedModel = model
 				this.event = new EventEmitter()
    }
 		async init(envPath?: string) {
 			try {
 				await this._downloadModel(this.selectedModel)
 				if (this.modelPath === undefined) {
 					console.log('Model not downloaded or not found')
 					return
 				}
 				console.log('Model downloaded at', this.modelPath)
 				LLamaBackend.model = this.selectedModel
 				LLamaBackend.modelPath = this.modelPath
 				this.inferencer = await LLamaBackend.getInstance()
 				this.inferencer.init()
 				this.isReady = this.inferencer.initialized
 			} catch (error) {
 				console.log('Error initializing the model', error)
 			}
 		}
    async _downloadModel(model): Promise<void> {
        console.log('Downloading the model model', model)
        console.log('Downloading model', model.downloadUrl) 
        const wdir = await this.plugin.call('fs' as any, 'getWorkingDir');
        console.log('working dir is', wdir)
        const outputLocationDir = await this.plugin.call('fs' as any, 'selectFolder', wdir);
        console.log('output location dir is', outputLocationDir)
        if (outputLocationDir === undefined) {
          console.log('No output location selected');
          return;
        }
        const outputLocationPath = path.join(outputLocationDir, model.modelName);
        console.log('output location path is', outputLocationDir)
        if (fs.existsSync(outputLocationPath)) { 
 					this.modelPath = outputLocationPath
          console.log('Model already exists in the output location', outputLocationPath);
          return;
        }
        // Make a HEAD request to get the file size
        const { headers } = await axios.head(model.downloadUrl);
        const totalSize = parseInt(headers['content-length'], 10);
        // Create a write stream to save the file
        const writer = fs.createWriteStream(outputLocationPath);
        // Start the file download
        const response = await axios({
          method: 'get',
          url: model.downloadUrl,
          responseType: 'stream'
        });
        let downloadedSize = 0;
        response.data.on('data', (chunk: Buffer) => {
          downloadedSize += chunk.length;
          const progress = (Number((downloadedSize / totalSize) * 100).toFixed(2));
          console.log(`Downloaded ${progress}%`);
          this.event.emit('download', progress);
        });
        response.data.pipe(writer);
 				this.event.emit('ready')
 				this.modelPath = outputLocationPath
        console.log('LLama Download complete');
        return new Promise((resolve, reject) => {
          writer.on('finish', resolve);
          writer.on('error', reject);
        });
    }
 		async code_completion(context: any, params): Promise<any> {
 			if (!this.isReady) {
 				console.log('model not ready yet')
 				return
 			}
 			// as of now no prompt required
 			this.event.emit('onInference')
 			const result =  this.inferencer.promptWithMeta(context)
 			this.event.emit('onInferenceDone')
 			return result
 		}
 		async code_insertion(msg_pfx: string, msg_sfx: string, params): Promise<any> {
 			if (!this.isReady) {
 				console.log('model not ready yet')
 				return
 			}
 			this.event.emit('onInference')
 			// const prompt = getInsertionPrompt(InlineCompletionTransformer.model, msg_pfx, msg_sfx)
 			// const instance = await InlineCompletionTransformer.getInstance()
 			// const result = instance(prompt, insertionParams)
 			// this.event.emit('onInferenceDone')
 			// return result
 		}
 }
--- a/apps/remixdesktop/src/plugins/remixAIDektop.ts
+++ b/apps/remixdesktop/src/plugins/remixAIDektop.ts
@ -8,6 +8,7 @@ import path from 'path';
 import {ipcMain} from 'electron';
 import {InlineCompletionServiceTransformer} from '../lib/completionTransformer'
 import { LLamaInferencer } from '../lib/llamaInferencer';
 //import {LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions} from "node-llama-cpp";
@ -43,13 +44,13 @@ const clientProfile: Profile = {
  description: 'RemixAI provides AI services to Remix IDE Desktop.',
  kind: '',
  documentation: 'https://remix-ide.readthedocs.io/en/latest/remixai.html',
-  methods: ['downloadModel', 'getInferenceModel', 'loadTransformerModel', 'code_completion'],
+  methods: ['initializeModelBackend', 'code_completion'],
 }
 class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
-  SelectedModelPath: any
+  
-  selectedModel: any
+  multitaskModel: LLamaInferencer| InlineCompletionServiceTransformer = null
-  inlineCompleter: any
+  completionModel: LLamaInferencer| InlineCompletionServiceTransformer = null
  constructor (webContentsId: number, profile: Profile){
    console.log("loading the remix plugin client ........................")
@ -63,85 +64,60 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
      console.log("loaded the remix plugin client application side")
    })
  }
  async listAvailableModels(){
  }
  async enable (){
    console.log('Remix AI desktop plugin enabled')
    this.emit('enabled')
  }
  async downloadModel(model): Promise<void> {
    console.log('Downloading the model model', model)
    console.log('Downloading model', model.downloadUrl) 
    const wdir = await this.call('fs' as any, 'getWorkingDir');
    console.log('working dir is', wdir)
    const outputLocationDir = await this.call('fs' as any, 'selectFolder', wdir);
    console.log('output location dir is', outputLocationDir)
    if (outputLocationDir === undefined) {
      console.log('No output location selected');
      return;
    }
-    const outputLocationPath = path.join(outputLocationDir, model.modelName);
+  async initializeModelBackend(multitaskModel: any, completionModel?: any){
-    console.log('output location path is', outputLocationDir)
+    console.log("Initializing backend with model ", multitaskModel, completionModel)
-    if (fs.existsSync(outputLocationPath)) { 
+    switch (multitaskModel.modelReqs.backend) {
-      console.log('Model already exists in the output location', outputLocationPath);
+      case 'llamacpp':
-      this.SelectedModelPath = outputLocationPath;
+        this.multitaskModel = new LLamaInferencer(this, multitaskModel)
-      this.selectedModel = model;
+        break;
-      return;
+      case 'transformerjs':
        this.multitaskModel = new InlineCompletionServiceTransformer(multitaskModel)
        break;
      default:
        console.log("Backend not supported")
        break;
    }
-    // Make a HEAD request to get the file size
+    if (completionModel && completionModel.modelType === 'CODE_COMPLETION'){
-    const { headers } = await axios.head(model.downloadUrl);
+      switch (completionModel.modelReqs.backend) {
-    const totalSize = parseInt(headers['content-length'], 10);
+        case 'llamacpp':
-
+          this.completionModel = new LLamaInferencer(this, completionModel)
-    // Create a write stream to save the file
+          break;
-    const writer = fs.createWriteStream(outputLocationPath);
+        case 'transformerjs':
-
+          this.completionModel = new InlineCompletionServiceTransformer(completionModel)
-    // Start the file download
+          break;
-    const response = await axios({
+        default:
-      method: 'get',
+          console.log("Backend not supported")
-      url: model.downloadUrl,
+          break;
-      responseType: 'stream'
+      }
-    });
+    }
    let downloadedSize = 0;
    response.data.on('data', (chunk: Buffer) => {
      downloadedSize += chunk.length;
      const progress = (downloadedSize / totalSize) * 100;
      console.log(`Downloaded ${progress}%`);
      this.emit('download_progress', progress);
    });
    response.data.pipe(writer);
    this.SelectedModelPath = outputLocationPath;
    this.selectedModel = model;
    console.log('Download complete');
    return new Promise((resolve, reject) => {
      writer.on('finish', resolve);
      writer.on('error', reject);
    });
-  }
+    // init the mmodels 
    if (this.multitaskModel){
      await this.multitaskModel.init()
    } 
-  async loadTransformerModel(defaultModels) {
+    if (this.completionModel){
-    this.inlineCompleter = await new InlineCompletionServiceTransformer(defaultModels);
+      await this.completionModel.init()
    if (this.inlineCompleter.ready) {
      console.log("Completer  ready");
    }
-    console.log("Loaded transformer")
+
  }
  code_completion(context: any) {
    console.log("Code completion called")
-    console.log("Context is ", this.inlineCompleter)
+    if (this.completionModel){
-    return this.inlineCompleter.code_completion(context);
+      return this.completionModel.code_completion(context, {max_new_tokens: 100})
    }
    // use general purpose model 
    return this.multitaskModel.code_completion(context, {max_new_tokens: 100})
  }
  // async _loadLocalModel(): Promise<LlamaChatSession> {
@ -182,5 +158,10 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
  //   return this._loadLocalModel();
  // } 
  changemodel(newModel: any){
    /// dereference the current static inference object
    /// set new one
  }
 }
--- a/apps/remixdesktop/yarn.lock
+++ b/apps/remixdesktop/yarn.lock
--- a/libs/remix-ui/remix-ai/src/lib/components/Default.tsx
+++ b/libs/remix-ui/remix-ai/src/lib/components/Default.tsx
@ -57,7 +57,7 @@ export const Default = (props) => {
              // if (!completer.ready) {
              //   await completer.init();
              // }
-              await props.plugin.call(pluginName, 'loadTransformerModel', DefaultModels());
+              await props.plugin.call(pluginName, 'initializeModelBackend', DefaultModels()[0]);
              // // const code = completer.code_completion("pragma solidity ^0.8.0;\n")
              console.log("Got transformer model completion ");