please make this code efficient

Question

1.86/5 (4 votes)

See more:

it is a similarity matching code that matches the similarity b/w authors papers titles & titles of clusters(qurries ).but this code is running very slow.so kindly help me to optimize this code. only main () function needs to be change. plzzzzzzzzzzzzzzzz help

C#

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Linq;
namespace VectorSpaceModel
{
    class Program
    {
        static Hashtable DTVector = new Hashtable(); //Hashtable to hold Document Term Vector
        static List<string> wordlist = new List<string>(); //List of terms found in documents
        static Dictionary<double,> sortedList1 = new Dictionary<double,>(); //Documents ranked by VSM with angle value
        static Dictionary<string,> sortedList = new Dictionary<string,>();
        static string[] docs = new string[37406];
    

        static void Main(string[] args)
        {
           // string fileName = @"D:\FYP\new fyp\fnlfyp\OSIM\vsm2\AuthorsList.txt";
            string fileName2 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_titles.txt";
            string fileName3 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\queries.txt";
            string fileName4 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_vsm.txt";

            int num = 0;
            string[] authors = new string[37406];

            using (System.IO.StreamReader read_author = new System.IO.StreamReader(fileName2))
            {
                String line;
                int j = 0;
                j++;
                while ((line = read_author.ReadLine()) != null)
                {
                    //if (File.Exists(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                    //{
                        //using (System.IO.StreamReader read_authFile = new System.IO.StreamReader(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                        //{
                           // String line1;
                            //while ((line1 = read_authFile.ReadLine()) != null)
                            //{
                                string[] array = line.Split('=');
                                Console.WriteLine(j);
                                authors[j] = array[0];
                                docs[j] = array[1];
                                j++;
                           // }
                       // }
                    //}
                   /* if (line.StartsWith("Topic"))
                    { }
                    else
                    {
                        String line1 = "";
                        for (int i = 0; i < 10; i++)
                        {
                            string[] words = line.Split(' ');
                            line = words[0];
                            line = line.Trim();
                            line1 = line1 + line + " ";
                            line = sr.ReadLine();
                        }
                        docs[j] = line1;
                        j++;
                    }*/
                }
            }

            using (System.IO.StreamReader sr1 = new System.IO.StreamReader(fileName3))
            {
                String line2 = "";
                while ((line2 = sr1.ReadLine()) != null)
                {
                    docs[0] = line2;
                    num++;
                    createWordList();
                    createVector();
                    classify();
                    var dict = sortedList;
                    using (System.IO.StreamWriter writer = new System.IO.StreamWriter(fileName4, true))
                    {
                        writer.WriteLine(num);
                        writer.WriteLine(line2);
                        foreach (var x in dict.Reverse())
                        {
                            Console.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                            writer.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                        }
                        writer.WriteLine("");
                       
                    }
                   // Console.ReadLine();
                    docs.LastOrDefault();
                    DTVector.Clear();
                    wordlist.Clear();
                    sortedList.Clear();

                }
            }
        }


        public static void createWordList()
        {
            foreach (string doc in docs)
            {
                wordlist = getWordList(wordlist, doc);
            }
        }

        public static List<string> getWordList(List<string> wordlist, string query)
        {
            Regex exp = new Regex("\\w+", RegexOptions.IgnoreCase);
            MatchCollection MCollection = exp.Matches(query);

            foreach (Match match in MCollection)
            {
                if (!wordlist.Contains(match.Value))
                {
                    wordlist.Add(match.Value);
                }
            }

            return wordlist;
        }

        public static void createVector()
        {
            double[] queryvector;

            for (int j = 0; j < docs.Length; j++)
            {
                queryvector = new double[wordlist.Count];

                for (int i = 0; i < wordlist.Count; i++)
                {

                    double tfIDF = getTF(docs[j], wordlist[i]) * getIDF(wordlist[i]);
                    queryvector[i] = tfIDF;
                }

                if (j == 0) //is it a query?
                {
                    DTVector.Add("Query", queryvector);

                }
                else
                {

                    DTVector.Add(j.ToString(), queryvector);
                }
            }
        }

        public static void classify()
        {
            double temp = 0.0;

            IDictionaryEnumerator _enumerator = DTVector.GetEnumerator();

            double[] queryvector = new double[wordlist.Count];

            Array.Copy((double[])DTVector["Query"], queryvector, wordlist.Count);

            while (_enumerator.MoveNext())
            {
                if (_enumerator.Key.ToString() != "Query")
                {
                    temp = cosinetheta(queryvector, (double[])_enumerator.Value);
                    if(temp != 0)
                    {
                        sortedList.Add(_enumerator.Key.ToString(), temp);
                    }
                }
            }
        }

        public static double dotproduct(double[] v1, double[] v2)
        {
            double product = 0.0;
            if (v1.Length == v2.Length)
            {
                for (int i = 0; i < v1.Length; i++)
                {
                    product += v1[i] * v2[i];
                }
            }
            return product;
        }

        public static double vectorlength(double[] vector)
        {
            double length = 0.0;
            for (int i = 0; i < vector.Length; i++)
            {
                length += Math.Pow(vector[i], 2);
            }

            return Math.Sqrt(length);
        }
        private static double getTF(string document, string term)
        {
            string[] queryTerms = Regex.Split(document, "\\s");
            double count = 0;


            foreach (string t in queryTerms)
            {
                if (t == term)
                {
                    count++;
                }
            }
            return count;

        }

        private static double getIDF(string term)
        {
            double df = 0.0;
            //get term frequency of all of the sentences except for the query
            for (int i = 1; i < docs.Length; i++)
            {
                if (docs[i].Contains(term))
                {
                    df++;
                }
            }

            //Get sentence count
            double D = docs.Length - 1; //excluding the query 

            double IDF = 0.0;

            if (df > 0)
            {
                IDF = Math.Log(D / df);
            }

            return IDF;
        }

        public static double cosinetheta(double[] v1, double[] v2)
        {
            double lengthV1 = vectorlength(v1);
            double lengthV2 = vectorlength(v2);

            double dotprod = dotproduct(v1, v2);
            if (lengthV1 != 0)
                return dotprod / (lengthV1 * lengthV2);
            else
                return 0;

        }
    }
}

Posted 18-May-14 20:23pm

ayesha04

Updated 18-May-14 20:35pm

DamithSL

v2

Add a Solution

1 solution

Add a Solution

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

OriginalGriff · Accepted Answer · 2014-05-18T20:49:00

Solution 1

Seriously? You slap a pile of messy rubbish on a website and you expect us to sort it out for you? When you can't even be bothered to get rid of redundant code so we can see what is there? Or comment your code to make it simple for us, or even give us what we would need to run it?

We do not do your homework: it is set for a reason. It is there so that you think about what you have been told, and try to understand it. It is also there so that your tutor can identify areas where you are weak, and focus more attention on remedial action.

Try it yourself, or learn the Magic Words: "Do you want fries with that?"

Posted 18-May-14 20:49pm

OriginalGriff

Comments

Kornfeld Eliyahu Peter 19-May-14 2:50am

Grab a cup of coffee! You seem to be a bit angry this morning...

OriginalGriff 19-May-14 3:02am

You're probably right...:laugh:

Telstra 19-May-14 3:21am

I think the @ayesha04 is new here. But you should not reply with such a answer so that she will never come back to this website again by such a experience.

Richard MacCutchan 19-May-14 3:43am

Looks like exactly the right answer to me. There are too many people posting rubbish like this and expecting someone else to do their work for them.

Telstra 19-May-14 3:57am

You are right Richard. Our site is for that only. But if you see the guidelines for submit the solution it is clearly saying that "Let's work to help developers, not make them feel stupid." Every one is not having the same level of understanding in this world.
Thanks

OriginalGriff 19-May-14 4:04am

Did I make him feel stupid? Or did he make himself look stupid by posting this with thinking even slightly about what he was doing?

You can't mollycoddle everybody: sometimes you have to be blunt to get your point across. That isn't being rude, it's being accurate and trying to make the OP think about what he has done and why that would annoy or offend those he is asking for help. And it's all true: we are not here as a homework service and he does need to be aware of that. Trust me on this: he is lucky it was me, and not some of our less tolerant members... :laugh:

Richard MacCutchan 19-May-14 4:13am

some of our less tolerant members

Errm, who did you have in mind? :blush:

OriginalGriff 19-May-14 4:23am

Not you, but I think you can guess... :laugh:

Richard MacCutchan 19-May-14 4:14am

And what about the guideline for submitting questions, did the OP read that? Obviously not.